{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  数据清洗和分析，同时进行特征处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 396,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np # python的数值计算扩展\n",
    "import pandas as pd # 数据处理、I/O操作\n",
    "import matplotlib.pyplot as plt # 绘图、可视化\n",
    "import seaborn as sns # 数据可视化\n",
    "color = sns.color_palette()\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "#pd.options.mode.chained_assignment = None  # default='warn'\n",
    "\n",
    "#用于计算feature字段的文本特征提取\n",
    "from sklearn.feature_extraction.text import  CountVectorizer\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "#CountVectorizer为稀疏特征，特征编码结果存为稀疏矩阵xgboost处理更高效\n",
    "from scipy import sparse\n",
    "\n",
    "\n",
    "#from sklearn.cross_validation import StratifiedKFold\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "#对类别型特征进行编码\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "#from MeanEncoder import MeanEncoder"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "加载训练数据，查看前几行对数据初步了解"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 397,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>building_id</th>\n",
       "      <th>created</th>\n",
       "      <th>description</th>\n",
       "      <th>display_address</th>\n",
       "      <th>features</th>\n",
       "      <th>interest_level</th>\n",
       "      <th>latitude</th>\n",
       "      <th>listing_id</th>\n",
       "      <th>longitude</th>\n",
       "      <th>manager_id</th>\n",
       "      <th>photos</th>\n",
       "      <th>price</th>\n",
       "      <th>street_address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1.5</td>\n",
       "      <td>3</td>\n",
       "      <td>53a5b119ba8f7b61d4e010512e0dfc85</td>\n",
       "      <td>2016-06-24 07:54:24</td>\n",
       "      <td>A Brand New 3 Bedroom 1.5 bath ApartmentEnjoy ...</td>\n",
       "      <td>Metropolitan Avenue</td>\n",
       "      <td>[]</td>\n",
       "      <td>medium</td>\n",
       "      <td>40.7145</td>\n",
       "      <td>7211212</td>\n",
       "      <td>-73.9425</td>\n",
       "      <td>5ba989232d0489da1b5f2c45f6688adc</td>\n",
       "      <td>[https://photos.renthop.com/2/7211212_1ed4542e...</td>\n",
       "      <td>3000</td>\n",
       "      <td>792 Metropolitan Avenue</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10000</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>c5c8a357cba207596b04d1afd1e4f130</td>\n",
       "      <td>2016-06-12 12:19:27</td>\n",
       "      <td></td>\n",
       "      <td>Columbus Avenue</td>\n",
       "      <td>[Doorman, Elevator, Fitness Center, Cats Allow...</td>\n",
       "      <td>low</td>\n",
       "      <td>40.7947</td>\n",
       "      <td>7150865</td>\n",
       "      <td>-73.9667</td>\n",
       "      <td>7533621a882f71e25173b27e3139d83d</td>\n",
       "      <td>[https://photos.renthop.com/2/7150865_be3306c5...</td>\n",
       "      <td>5465</td>\n",
       "      <td>808 Columbus Avenue</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100004</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>c3ba40552e2120b0acfc3cb5730bb2aa</td>\n",
       "      <td>2016-04-17 03:26:41</td>\n",
       "      <td>Top Top West Village location, beautiful Pre-w...</td>\n",
       "      <td>W 13 Street</td>\n",
       "      <td>[Laundry In Building, Dishwasher, Hardwood Flo...</td>\n",
       "      <td>high</td>\n",
       "      <td>40.7388</td>\n",
       "      <td>6887163</td>\n",
       "      <td>-74.0018</td>\n",
       "      <td>d9039c43983f6e564b1482b273bd7b01</td>\n",
       "      <td>[https://photos.renthop.com/2/6887163_de85c427...</td>\n",
       "      <td>2850</td>\n",
       "      <td>241 W 13 Street</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100007</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>28d9ad350afeaab8027513a3e52ac8d5</td>\n",
       "      <td>2016-04-18 02:22:02</td>\n",
       "      <td>Building Amenities - Garage - Garden - fitness...</td>\n",
       "      <td>East 49th Street</td>\n",
       "      <td>[Hardwood Floors, No Fee]</td>\n",
       "      <td>low</td>\n",
       "      <td>40.7539</td>\n",
       "      <td>6888711</td>\n",
       "      <td>-73.9677</td>\n",
       "      <td>1067e078446a7897d2da493d2f741316</td>\n",
       "      <td>[https://photos.renthop.com/2/6888711_6e660cee...</td>\n",
       "      <td>3275</td>\n",
       "      <td>333 East 49th Street</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100013</th>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>2016-04-28 01:32:41</td>\n",
       "      <td>Beautifully renovated 3 bedroom flex 4 bedroom...</td>\n",
       "      <td>West 143rd Street</td>\n",
       "      <td>[Pre-War]</td>\n",
       "      <td>low</td>\n",
       "      <td>40.8241</td>\n",
       "      <td>6934781</td>\n",
       "      <td>-73.9493</td>\n",
       "      <td>98e13ad4b495b9613cef886d79a6291f</td>\n",
       "      <td>[https://photos.renthop.com/2/6934781_1fa4b41a...</td>\n",
       "      <td>3350</td>\n",
       "      <td>500 West 143rd Street</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        bathrooms  bedrooms                       building_id  \\\n",
       "10            1.5         3  53a5b119ba8f7b61d4e010512e0dfc85   \n",
       "10000         1.0         2  c5c8a357cba207596b04d1afd1e4f130   \n",
       "100004        1.0         1  c3ba40552e2120b0acfc3cb5730bb2aa   \n",
       "100007        1.0         1  28d9ad350afeaab8027513a3e52ac8d5   \n",
       "100013        1.0         4                                 0   \n",
       "\n",
       "                    created  \\\n",
       "10      2016-06-24 07:54:24   \n",
       "10000   2016-06-12 12:19:27   \n",
       "100004  2016-04-17 03:26:41   \n",
       "100007  2016-04-18 02:22:02   \n",
       "100013  2016-04-28 01:32:41   \n",
       "\n",
       "                                              description  \\\n",
       "10      A Brand New 3 Bedroom 1.5 bath ApartmentEnjoy ...   \n",
       "10000                                                       \n",
       "100004  Top Top West Village location, beautiful Pre-w...   \n",
       "100007  Building Amenities - Garage - Garden - fitness...   \n",
       "100013  Beautifully renovated 3 bedroom flex 4 bedroom...   \n",
       "\n",
       "            display_address  \\\n",
       "10      Metropolitan Avenue   \n",
       "10000       Columbus Avenue   \n",
       "100004          W 13 Street   \n",
       "100007     East 49th Street   \n",
       "100013    West 143rd Street   \n",
       "\n",
       "                                                 features interest_level  \\\n",
       "10                                                     []         medium   \n",
       "10000   [Doorman, Elevator, Fitness Center, Cats Allow...            low   \n",
       "100004  [Laundry In Building, Dishwasher, Hardwood Flo...           high   \n",
       "100007                          [Hardwood Floors, No Fee]            low   \n",
       "100013                                          [Pre-War]            low   \n",
       "\n",
       "        latitude  listing_id  longitude                        manager_id  \\\n",
       "10       40.7145     7211212   -73.9425  5ba989232d0489da1b5f2c45f6688adc   \n",
       "10000    40.7947     7150865   -73.9667  7533621a882f71e25173b27e3139d83d   \n",
       "100004   40.7388     6887163   -74.0018  d9039c43983f6e564b1482b273bd7b01   \n",
       "100007   40.7539     6888711   -73.9677  1067e078446a7897d2da493d2f741316   \n",
       "100013   40.8241     6934781   -73.9493  98e13ad4b495b9613cef886d79a6291f   \n",
       "\n",
       "                                                   photos  price  \\\n",
       "10      [https://photos.renthop.com/2/7211212_1ed4542e...   3000   \n",
       "10000   [https://photos.renthop.com/2/7150865_be3306c5...   5465   \n",
       "100004  [https://photos.renthop.com/2/6887163_de85c427...   2850   \n",
       "100007  [https://photos.renthop.com/2/6888711_6e660cee...   3275   \n",
       "100013  [https://photos.renthop.com/2/6934781_1fa4b41a...   3350   \n",
       "\n",
       "                 street_address  \n",
       "10      792 Metropolitan Avenue  \n",
       "10000       808 Columbus Avenue  \n",
       "100004          241 W 13 Street  \n",
       "100007     333 East 49th Street  \n",
       "100013    500 West 143rd Street  "
      ]
     },
     "execution_count": 397,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data = pd.read_json(\"RentListingInquries_train.json\")\n",
    "train_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "查看训练数据和测试数据的基本信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 398,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 49352 entries, 10 to 99994\n",
      "Data columns (total 15 columns):\n",
      "bathrooms          49352 non-null float64\n",
      "bedrooms           49352 non-null int64\n",
      "building_id        49352 non-null object\n",
      "created            49352 non-null object\n",
      "description        49352 non-null object\n",
      "display_address    49352 non-null object\n",
      "features           49352 non-null object\n",
      "interest_level     49352 non-null object\n",
      "latitude           49352 non-null float64\n",
      "listing_id         49352 non-null int64\n",
      "longitude          49352 non-null float64\n",
      "manager_id         49352 non-null object\n",
      "photos             49352 non-null object\n",
      "price              49352 non-null int64\n",
      "street_address     49352 non-null object\n",
      "dtypes: float64(3), int64(3), object(9)\n",
      "memory usage: 6.0+ MB\n"
     ]
    }
   ],
   "source": [
    "train_data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 399,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 74659 entries, 0 to 99999\n",
      "Data columns (total 14 columns):\n",
      "bathrooms          74659 non-null float64\n",
      "bedrooms           74659 non-null int64\n",
      "building_id        74659 non-null object\n",
      "created            74659 non-null object\n",
      "description        74659 non-null object\n",
      "display_address    74659 non-null object\n",
      "features           74659 non-null object\n",
      "latitude           74659 non-null float64\n",
      "listing_id         74659 non-null int64\n",
      "longitude          74659 non-null float64\n",
      "manager_id         74659 non-null object\n",
      "photos             74659 non-null object\n",
      "price              74659 non-null int64\n",
      "street_address     74659 non-null object\n",
      "dtypes: float64(3), int64(3), object(8)\n",
      "memory usage: 8.5+ MB\n"
     ]
    }
   ],
   "source": [
    "test_data = pd.read_json(\"RentListingInquries_test.json\")\n",
    "test_data.info()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "训练数据和测试数据分别有49352和74659行，均无空值。其中有数值型变量，也有非数值变量，需进行处理"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "探索数据，逐个变量查看分析。首先看目标变量Interest_level"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## interest_level"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 400,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAELCAYAAAARNxsIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAGQpJREFUeJzt3X20XXV95/H3h/DYqgXk6iDBhtHM\nUnQ06hWoWJdPCwIzY9CKwtSSUlajDviwWjuiaxZYlLXqssqIRdbgGIGOFagPQ8aJxgxiEcpDgoZA\nQFZugUqEQjCgoC0U/M4f53flEE5yL2Gfe3KT92uts87e3/3b+/w2Z10+2Xv/zt6pKiRJ6sIuo+6A\nJGnHYahIkjpjqEiSOmOoSJI6Y6hIkjpjqEiSOmOoSJI6Y6hIkjpjqEiSOrPrqDsw0/bbb7+aN2/e\nqLshSbPK9ddff19VjU3VbqcLlXnz5rF69epRd0OSZpUk/ziddp7+kiR1ZmihkmTPJNcluSHJuiR/\n3urnJ7k9yZr2WtDqSXJ2kokka5O8sm9bi5Osb6/FffVXJbmxrXN2kgxrfyRJUxvm6a+HgTdW1UNJ\ndgOuTPKttuzPquqrm7U/CpjfXocC5wKHJtkXOB0YBwq4Psmyqrq/tVkCXAMsBxYC30KSNBJDO1Kp\nnofa7G7ttbX77C8CLmzrXQPsnWR/4EhgZVVtakGyEljYlj2rqq6u3v37LwSOGdb+SJKmNtRrKknm\nJFkD3EsvGK5ti85sp7jOSrJHqx0A3Nm3+oZW21p9w4D6oH4sSbI6yeqNGzc+7f2SJA021FCpqseq\nagEwFzgkyUuBjwAvAl4N7At8uDUfdD2ktqE+qB/nVdV4VY2PjU05Ik6StI1mZPRXVT0AfA9YWFV3\nt1NcDwNfAg5pzTYAB/atNhe4a4r63AF1SdKIDHP011iSvdv0XsCbgR+1ayG0kVrHADe1VZYBJ7RR\nYIcBP6uqu4EVwBFJ9kmyD3AEsKItezDJYW1bJwCXDmt/JElTG+bor/2BC5LMoRdel1TVN5N8N8kY\nvdNXa4D3tPbLgaOBCeCXwIkAVbUpyceBVa3dGVW1qU2/Fzgf2IveqC9HfknSCKU3cGrnMT4+Xv6i\nfsf34zP+/ai7sFN4/mk3jroLmiFJrq+q8ana+Yt6SVJnDBVJUmcMFUlSZwwVSVJnDBVJUmcMFUlS\nZwwVSVJnDBVJUmcMFUlSZwwVSVJnDBVJUmcMFUlSZwwVSVJnDBVJUmcMFUlSZwwVSVJnDBVJUmcM\nFUlSZwwVSVJnDBVJUmcMFUlSZ4YWKkn2THJdkhuSrEvy561+UJJrk6xPcnGS3Vt9jzY/0ZbP69vW\nR1r91iRH9tUXttpEklOHtS+SpOkZ5pHKw8Abq+rlwAJgYZLDgE8CZ1XVfOB+4KTW/iTg/qp6IXBW\na0eSg4HjgJcAC4HPJ5mTZA5wDnAUcDBwfGsrSRqRoYVK9TzUZndrrwLeCHy11S8AjmnTi9o8bfmb\nkqTVL6qqh6vqdmACOKS9Jqrqtqp6BLiotZUkjchQr6m0I4o1wL3ASuAfgAeq6tHWZANwQJs+ALgT\noC3/GfDs/vpm62ypLkkakaGGSlU9VlULgLn0jixePKhZe88Wlj3V+pMkWZJkdZLVGzdunLrjkqRt\nMiOjv6rqAeB7wGHA3kl2bYvmAne16Q3AgQBt+W8Bm/rrm62zpfqgzz+vqsaranxsbKyLXZIkDTDM\n0V9jSfZu03sBbwZuAS4H3t6aLQYubdPL2jxt+Xerqlr9uDY67CBgPnAdsAqY30aT7U7vYv6yYe2P\nJGlqu07dZJvtD1zQRmntAlxSVd9McjNwUZJPAD8EvtjafxH46yQT9I5QjgOoqnVJLgFuBh4FTq6q\nxwCSnAKsAOYAS6tq3RD3R5I0haGFSlWtBV4xoH4bvesrm9f/BTh2C9s6EzhzQH05sPxpd1aS1Al/\nUS9J6oyhIknqjKEiSeqMoSJJ6oyhIknqjKEiSeqMoSJJ6oyhIknqjKEiSeqMoSJJ6oyhIknqjKEi\nSeqMoSJJ6oyhIknqjKEiSeqMoSJJ6oyhIknqjKEiSeqMoSJJ6oyhIknqjKEiSerM0EIlyYFJLk9y\nS5J1ST7Q6h9L8pMka9rr6L51PpJkIsmtSY7sqy9stYkkp/bVD0pybZL1SS5Osvuw9keSNLVhHqk8\nCvxpVb0YOAw4OcnBbdlZVbWgvZYDtGXHAS8BFgKfTzInyRzgHOAo4GDg+L7tfLJtaz5wP3DSEPdH\nkjSFoYVKVd1dVT9o0w8CtwAHbGWVRcBFVfVwVd0OTACHtNdEVd1WVY8AFwGLkgR4I/DVtv4FwDHD\n2RtJ0nTMyDWVJPOAVwDXttIpSdYmWZpkn1Y7ALizb7UNrbal+rOBB6rq0c3qkqQRGXqoJHkG8DXg\ng1X1c+Bc4AXAAuBu4NOTTQesXttQH9SHJUlWJ1m9cePGp7gHkqTpGmqoJNmNXqB8uaq+DlBV91TV\nY1X1K+AL9E5vQe9I48C+1ecCd22lfh+wd5JdN6s/SVWdV1XjVTU+NjbWzc5Jkp5kmKO/AnwRuKWq\nPtNX37+v2VuBm9r0MuC4JHskOQiYD1wHrALmt5Feu9O7mL+sqgq4HHh7W38xcOmw9keSNLVdp26y\nzQ4H/gC4McmaVvsovdFbC+idqroDeDdAVa1LcglwM72RYydX1WMASU4BVgBzgKVVta5t78PARUk+\nAfyQXohJkkZkaKFSVVcy+LrH8q2scyZw5oD68kHrVdVtPH76TJI0Yv6iXpLUGUNFktQZQ0WS1BlD\nRZLUGUNFktQZQ0WS1BlDRZLUGUNFktQZQ0WS1BlDRZLUGUNFktQZQ0WS1BlDRZLUGUNFktQZQ0WS\n1BlDRZLUGUNFktQZQ0WS1BlDRZLUGUNFktQZQ0WS1JmhhUqSA5NcnuSWJOuSfKDV902yMsn69r5P\nqyfJ2UkmkqxN8sq+bS1u7dcnWdxXf1WSG9s6ZyfJsPZHkjS1YR6pPAr8aVW9GDgMODnJwcCpwGVV\nNR+4rM0DHAXMb68lwLnQCyHgdOBQ4BDg9Mkgam2W9K23cIj7I0mawtBCparurqoftOkHgVuAA4BF\nwAWt2QXAMW16EXBh9VwD7J1kf+BIYGVVbaqq+4GVwMK27FlVdXVVFXBh37YkSSMwI9dUkswDXgFc\nCzy3qu6GXvAAz2nNDgDu7FttQ6ttrb5hQF2SNCLTCpUkl02ntoV1nwF8DfhgVf18a00H1Gob6oP6\nsCTJ6iSrN27cOFWXJUnbaKuhkmTPdk1jvyT7tIvs+7Yjj+dNtfEku9ELlC9X1ddb+Z526or2fm+r\nbwAO7Ft9LnDXFPW5A+pPUlXnVdV4VY2PjY1N1W1J0jaa6kjl3cD1wIva++TrUuCcra3YRmJ9Ebil\nqj7Tt2gZMDmCa3Hb1mT9hDYK7DDgZ+302ArgiBZq+wBHACvasgeTHNY+64S+bUmSRmDXrS2sqs8C\nn03yvqr63FPc9uHAHwA3JlnTah8F/gK4JMlJwI+BY9uy5cDRwATwS+DE1odNST4OrGrtzqiqTW36\nvcD5wF7At9pLkjQiWw2VSVX1uSSvAeb1r1NVF25lnSsZfN0D4E0D2hdw8ha2tRRYOqC+Gnjp1vou\nSZo50wqVJH8NvABYAzzWypPDeCVJAqYZKsA4cHA7mpAkaaDp/k7lJuDfDLMjkqTZb7pHKvsBNye5\nDnh4slhVbxlKryRJs9J0Q+Vjw+yEJGnHMN3RX3837I5Ikma/6Y7+epDHb4GyO7Ab8IuqetawOiZJ\nmn2me6TyzP75JMfQuw29JEm/tk13Ka6q/w28seO+SJJmueme/npb3+wu9H634m9WJElPMN3RX/+p\nb/pR4A56D9WSJOnXpntN5cRhd0SSNPtN9yFdc5N8I8m9Se5J8rUkc6deU5K0M5nuhfov0XveyfPo\nPbL3/7SaJEm/Nt1QGauqL1XVo+11PuAjFCVJTzDdULkvybuSzGmvdwE/HWbHJEmzz3RD5Y+AdwD/\nBNwNvJ32ZEZJkiZNd0jxx4HFVXU/QJJ9gb+kFzaSJAHTP1J52WSgQO+58cArhtMlSdJsNd1Q2SXJ\nPpMz7Uhlukc5kqSdxHSD4dPA3yf5Kr3bs7wDOHNovZIkzUrT/UX9hUlW07uJZIC3VdXNQ+2ZJGnW\nmfZdiqvq5qr6q6r63HQCJcnS9gv8m/pqH0vykyRr2uvovmUfSTKR5NYkR/bVF7baRJJT++oHJbk2\nyfokFyfZfbr7Ikkajm269f00nQ8sHFA/q6oWtNdygCQHA8cBL2nrfH7yNzHAOcBRwMHA8a0twCfb\ntuYD9wMnDXFfJEnTMLRQqaorgE3TbL4IuKiqHq6q24EJeg8BOwSYqKrbquoR4CJgUZLQOxX31bb+\nBcAxne6AJOkpG+aRypackmRtOz02OaLsAODOvjYbWm1L9WcDD1TVo5vVB0qyJMnqJKs3btzY1X5I\nkjYz06FyLvACYAG9X+Z/utUzoG1tQ32gqjqvqsaranxszFuWSdKwzOhvTarqnsnpJF8AvtlmNwAH\n9jWdC9zVpgfV7wP2TrJrO1rpby9JGpEZPVJJsn/f7FuByZFhy4DjkuyR5CBgPnAdsAqY30Z67U7v\nYv6yqirgcnr3IANYDFw6E/sgSdqyoR2pJPkK8HpgvyQbgNOB1ydZQO9U1R3AuwGqal2SS4Cb6T2u\n+OSqeqxt5xRgBTAHWFpV69pHfBi4KMkngB8CXxzWvkiSpmdooVJVxw8ob/F//FV1JgN+pd+GHS8f\nUL+N3ugwSdJ2YhSjvyRJOyhDRZLUGUNFktQZQ0WS1BlDRZLUGUNFktQZQ0WS1BlDRZLUGUNFktQZ\nQ0WS1BlDRZLUGUNFktQZQ0WS1BlDRZLUGUNFktQZQ0WS1BlDRZLUmaE9+XFH8Ko/u3DUXdjhXf+p\nE0bdBUkd8khFktQZQ0WS1BlDRZLUmaGFSpKlSe5NclNfbd8kK5Osb+/7tHqSnJ1kIsnaJK/sW2dx\na78+yeK++quS3NjWOTtJhrUvkqTpGeaRyvnAws1qpwKXVdV84LI2D3AUML+9lgDnQi+EgNOBQ4FD\ngNMng6i1WdK33uafJUmaYUMLlaq6Ati0WXkRcEGbvgA4pq9+YfVcA+ydZH/gSGBlVW2qqvuBlcDC\ntuxZVXV1VRVwYd+2JEkjMtPXVJ5bVXcDtPfntPoBwJ197Ta02tbqGwbUB0qyJMnqJKs3btz4tHdC\nkjTY9nKhftD1kNqG+kBVdV5VjVfV+NjY2DZ2UZI0lZkOlXvaqSva+72tvgE4sK/dXOCuKepzB9Ql\nSSM006GyDJgcwbUYuLSvfkIbBXYY8LN2emwFcESSfdoF+iOAFW3Zg0kOa6O+TujbliRpRIZ2m5Yk\nXwFeD+yXZAO9UVx/AVyS5CTgx8Cxrfly4GhgAvglcCJAVW1K8nFgVWt3RlVNXvx/L70RZnsB32ov\nSdIIDS1Uqur4LSx604C2BZy8he0sBZYOqK8GXvp0+ihJ6tb2cqFekrQDMFQkSZ0xVCRJnTFUJEmd\nMVQkSZ0xVCRJnTFUJEmdMVQkSZ0xVCRJnTFUJEmdMVQkSZ0xVCRJnTFUJEmdMVQkSZ0xVCRJnTFU\nJEmdMVQkSZ0xVCRJnTFUJEmdMVQkSZ0xVCRJnRlJqCS5I8mNSdYkWd1q+yZZmWR9e9+n1ZPk7CQT\nSdYmeWXfdha39uuTLB7FvkiSHjfKI5U3VNWCqhpv86cCl1XVfOCyNg9wFDC/vZYA50IvhIDTgUOB\nQ4DTJ4NIkjQa29Ppr0XABW36AuCYvvqF1XMNsHeS/YEjgZVVtamq7gdWAgtnutOSpMeNKlQK+E6S\n65MsabXnVtXdAO39Oa1+AHBn37obWm1LdUnSiOw6os89vKruSvIcYGWSH22lbQbUaiv1J2+gF1xL\nAJ7//Oc/1b5KkqZpJEcqVXVXe78X+Aa9ayL3tNNatPd7W/MNwIF9q88F7tpKfdDnnVdV41U1PjY2\n1uWuSJL6zHioJPnNJM+cnAaOAG4ClgGTI7gWA5e26WXACW0U2GHAz9rpsRXAEUn2aRfoj2g1SdKI\njOL013OBbySZ/Py/qapvJ1kFXJLkJODHwLGt/XLgaGAC+CVwIkBVbUrycWBVa3dGVW2aud2QJG1u\nxkOlqm4DXj6g/lPgTQPqBZy8hW0tBZZ23UdJ0rbZnoYUS5JmOUNFktSZUQ0plqQtOvxzh4+6Czu8\nq9531VC265GKJKkzhookqTOGiiSpM4aKJKkzhookqTOGiiSpM4aKJKkzhookqTOGiiSpM4aKJKkz\nhookqTOGiiSpM4aKJKkzhookqTOGiiSpM4aKJKkzhookqTOGiiSpM7M+VJIsTHJrkokkp466P5K0\nM5vVoZJkDnAOcBRwMHB8koNH2ytJ2nnN6lABDgEmquq2qnoEuAhYNOI+SdJOa7aHygHAnX3zG1pN\nkjQCu466A09TBtTqSY2SJcCSNvtQkluH2qvR2g+4b9SdmK785eJRd2F7Mqu+OwBOH/QnuNOaVd9f\n3v+Uv7vfnk6j2R4qG4AD++bnAndt3qiqzgPOm6lOjVKS1VU1Pup+6Knzu5vd/P56Zvvpr1XA/CQH\nJdkdOA5YNuI+SdJOa1YfqVTVo0lOAVYAc4ClVbVuxN2SpJ3WrA4VgKpaDiwfdT+2IzvFab4dlN/d\n7Ob3B6TqSde1JUnaJrP9mookaTtiqOxAknwvyXibXp5k71H3SU+U5KFR90FPTZJ5SW4aUD8jyZun\nWPdjST40vN5tf2b9NRUNVlVHj7oP0o6sqk4bdR+2Rx6pjFj7V9CPkvzPJDcl+XKSNye5Ksn6JIck\n+c0kS5OsSvLDJIvaunsluSjJ2iQXA3v1bfeOJPtt/q+sJB9K8rE2/b0kZyW5IsktSV6d5Ovtcz8x\n0/8tdibp+VT7zm9M8s5W/3ySt7TpbyRZ2qZP8jsZqTlJvpBkXZLvtL+985O8HSDJ0e3v+MokZyf5\nZt+6B7e/tduSvH9E/Z8xHqlsH14IHEvvV/+rgP8MvBZ4C/BR4Gbgu1X1R+2U1nVJ/h/wbuCXVfWy\nJC8DfrANn/1IVb0uyQeAS4FXAZuAf0hyVlX99OnunAZ6G7AAeDm9X2KvSnIFcAXwu/R+b3UAsH9r\n/1p697bTaMwHjq+qP05yCfB7kwuS7An8D+B1VXV7kq9stu6LgDcAzwRuTXJuVf3rTHV8pnmksn24\nvapurKpfAeuAy6o3LO9GYB5wBHBqkjXA94A9gecDrwP+F0BVrQXWbsNnT/5Y9EZgXVXdXVUPA7fx\nxLsVqFuvBb5SVY9V1T3A3wGvBr4P/G672/bNwD1J9gd+B/j7kfVWt1fVmjZ9Pb2/y0kvAm6rqtvb\n/Oah8n+r6uGqug+4F3juUHs6Yh6pbB8e7pv+Vd/8r+h9R48Bv1dVT7hnWRIYcK+zzTzKE//xsOcW\nPrv/c/s/W8Mx8MZLVfWTJPsAC+kdtewLvAN4qKoenMH+6Yn6/zYeo+9UM1v4Lrey7g79d+WRyuyw\nAnhfWookeUWrXwH8fqu9FHjZgHXvAZ6T5NlJ9gD+4wz0V1O7AnhnkjlJxugddV7Xll0NfLC1+T7w\nofau7dOPgH+bZF6bf+foujJ6O3Ri7kA+Dvx3YG0LljvohcO5wJeSrAXW8Pj/lH6tqv41yRnAtcDt\n9P4ANHrfoHdK6wZ6R5v/tar+qS37PnBEVU0k+Ud6RyuGynaqqv45yX8Bvp3kPgb8He5M/EW9JD1N\nSZ5RVQ+1f/SdA6yvqrNG3a9R8PSXJD19f9wG0qwDfoveaLCdkkcqkqTOeKQiSeqMoSJJ6oyhIknq\njKEiSeqMoSIBSaa8BUqSDyb5jSH3Y0GSrd5hOskfJvmrjj+3821q52SoSEBVvWYazT4IPKVQSTLn\nKXZlAeBjCzRrGSoSjz88K8nr223Kv9puZf7ldpv69wPPAy5Pcnlre0SSq5P8IMnfJnlGq9+R5LQk\nVwLHJnlBkm8nuT7J95O8qLU7tt36/ob2+IHdgTPo3b5lzeTt8Kfo91iSr7XHIqxKcniSXVof9u5r\nN5HkuYPad/4fUzs1b9MiPdkrgJcAdwFXAYdX1dlJ/gR4Q1Xdl2Q/4L8Bb66qXyT5MPAn9EIB4F+q\n6rUASS4D3lNV65McCnweeCNwGnBku4nk3lX1SJLTgPGqOmWaff0scFZVXZnk+cCKqnpxkkuBt9K7\njc+hwB1VdU+Sv9m8PfDip/nfS/o1Q0V6suuqagNA+5X0PODKzdocBhwMXNXu87k7vRtBTrq4rf8M\n4DXA37Z2AHu096uA89vzOb6+jX19M72HQE3OPyvJM9vnnwZ8CThusj9baS91wlCRnmw6tyoPsLKq\njt/CNn7R3ncBHqiqBZs3qKr3tKOI/wCsSfKkNtOwC/A7VfXPT+hccjXwwnYH5GOAT0zRfhs+Wnoy\nr6lI0/cgvaf3AVwDHJ7khQBJfiPJv9t8har6OXB7kmNbuyR5eZt+QVVd2551fh+9h6L1f8Z0fAf4\n9amyyWBqD3n7BvAZ4Ja+J3gObC91xVCRpu884FtJLq+qjcAfAl9pjx64ht4TAAf5feCkJDfQu+Hg\nolb/VHrPp7+J3rNTbgAup3d6aloX6oH3A+NJ1ia5GXhP37KLgXfx+KmvqdpLT5s3lJQkdcYjFUlS\nZ7xQL22nkpwIfGCz8lVVdfIo+iNNh6e/JEmd8fSXJKkzhookqTOGiiSpM4aKJKkzhookqTP/H0Mg\nXhgvx4dVAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1016997f0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#interest_level是非数值型的，\n",
    "sns.countplot(train_data.interest_level);\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## bathrooms、bedrooms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 401,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEKCAYAAADaa8itAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAHVRJREFUeJzt3X+QXWWd5/H3h0AARUiAhsEkbHAm\nowZWA/SEOExNIVhJQIfgCFNhVAKDFXWDI+XsDmF2SxRMjWztyIhCZqMEEmUITBCJbDBm+bGWu/xI\nB0JCCExaQNOTSBoTfigzYQPf/eM8DWeb293n3n5OXxo+r6pb95zveZ7vfU76dn9zfisiMDMzy2Gf\ndg/AzMzeOlxUzMwsGxcVMzPLxkXFzMyycVExM7NsXFTMzCwbFxUzM8vGRcXMzLJxUTEzs2z2bfcA\nRtrhhx8ekydPbvcwzMxGlfXr1z8bER1DtXvbFZXJkyfT1dXV7mGYmY0qkn5RpZ13f5mZWTYuKmZm\nlk3tRUXSGEkPS7ojzR8j6QFJWyXdLGlsiu+f5rvT8smlHJem+BOSZpXis1OsW9LCutfFzMwGNxJb\nKl8EtpTmrwSuiogpwG7gwhS/ENgdEb8HXJXaIWkqMBc4FpgNXJsK1RjgGuB0YCpwbmprZmZtUmtR\nkTQR+Cjw3TQv4FRgZWqyDDgrTc9J86Tlp6X2c4AVEbEnIp4CuoHp6dUdEU9GxMvAitTWzMzapO4t\nlb8H/hp4Nc0fBjwXEXvTfA8wIU1PALYBpOXPp/avxfv1GShuZmZtUltRkfQxYGdErC+HGzSNIZY1\nG280lvmSuiR19fb2DjJqMzMbjjq3VE4GzpT0NMWuqVMptlzGSeq7PmYisD1N9wCTANLyQ4Bd5Xi/\nPgPF3yAilkREZ0R0dnQMee2OmZm1qLaiEhGXRsTEiJhMcaD97oj4JHAPcHZqNg+4PU2vSvOk5XdH\nRKT43HR22DHAFOBBYB0wJZ1NNjZ9xqq61sfMzIbWjivqLwFWSPoa8DBwXYpfB3xPUjfFFspcgIjY\nLOkW4DFgL7AgIl4BkHQRsAYYAyyNiM0juiYNbL/mS1nyvHvBN7LkMTMbSSNSVCLiXuDeNP0kxZlb\n/dv8G3DOAP0XAYsaxFcDqzMO1czMhsFX1JuZWTYuKmZmlo2LipmZZeOiYmZm2biomJlZNi4qZmaW\njYuKmZll46JiZmbZuKiYmVk2LipmZpaNi4qZmWXjomJmZtm4qJiZWTYuKmZmlo2LipmZZeOiYmZm\n2biomJlZNrUVFUkHSHpQ0iOSNkv6aorfIOkpSRvSa1qKS9LVkrolbZR0QinXPElb02teKX6ipE2p\nz9WSVNf6mJnZ0Op8nPAe4NSI+I2k/YCfSbozLftPEbGyX/vTgSnpdRKwGDhJ0qHAZUAnEMB6Sasi\nYndqMx+4n+KxwrOBOzEzs7aobUslCr9Js/ulVwzSZQ6wPPW7Hxgn6ShgFrA2InalQrIWmJ2WHRwR\n90VEAMuBs+paHzMzG1qtx1QkjZG0AdhJURgeSIsWpV1cV0naP8UmANtK3XtSbLB4T4O4mZm1Sa1F\nJSJeiYhpwERguqTjgEuB9wF/ABwKXJKaNzoeEi3E30DSfEldkrp6e3ubXAszM6tqRM7+iojngHuB\n2RGxI+3i2gNcD0xPzXqASaVuE4HtQ8QnNog3+vwlEdEZEZ0dHR0Z1sjMzBqp8+yvDknj0vSBwEeA\nx9OxENKZWmcBj6Yuq4Dz0llgM4DnI2IHsAaYKWm8pPHATGBNWvaipBkp13nA7XWtj5mZDa3Os7+O\nApZJGkNRvG6JiDsk3S2pg2L31Qbgc6n9auAMoBt4CbgAICJ2SboCWJfaXR4Ru9L054EbgAMpzvry\nmV9mZm1UW1GJiI3A8Q3ipw7QPoAFAyxbCixtEO8CjhveSM3MLBdfUW9mZtm4qJiZWTYuKmZmlo2L\nipmZZeOiYmZm2biomJlZNi4qZmaWjYuKmZll46JiZmbZuKiYmVk2LipmZpaNi4qZmWXjomJmZtm4\nqJiZWTYuKmZmlo2LipmZZeOiYmZm2biomJlZNrUVFUkHSHpQ0iOSNkv6aoofI+kBSVsl3SxpbIrv\nn+a70/LJpVyXpvgTkmaV4rNTrFvSwrrWxczMqqlzS2UPcGpEfBCYBsyWNAO4ErgqIqYAu4ELU/sL\ngd0R8XvAVakdkqYCc4FjgdnAtZLGSBoDXAOcDkwFzk1tzcysTWorKlH4TZrdL70COBVYmeLLgLPS\n9Jw0T1p+miSl+IqI2BMRTwHdwPT06o6IJyPiZWBFamtmZm1S6zGVtEWxAdgJrAV+DjwXEXtTkx5g\nQpqeAGwDSMufBw4rx/v1GSjeaBzzJXVJ6urt7c2xamZm1kCtRSUiXomIacBEii2L9zdqlt41wLJm\n443GsSQiOiOis6OjY+iBm5lZS0bk7K+IeA64F5gBjJO0b1o0EdiepnuASQBp+SHArnK8X5+B4mZm\n1iZ1nv3VIWlcmj4Q+AiwBbgHODs1mwfcnqZXpXnS8rsjIlJ8bjo77BhgCvAgsA6Yks4mG0txMH9V\nXetjZmZD23foJi07CliWztLaB7glIu6Q9BiwQtLXgIeB61L764DvSeqm2EKZCxARmyXdAjwG7AUW\nRMQrAJIuAtYAY4ClEbG5xvUxM7Mh1FZUImIjcHyD+JMUx1f6x/8NOGeAXIuARQ3iq4HVwx6smZll\n4SvqzcwsGxcVMzPLxkXFzMyycVExM7NsXFTMzCwbFxUzM8vGRcXMzLJxUTEzs2xcVMzMLBsXFTMz\ny8ZFxczMsnFRMTOzbFxUzMwsGxcVMzPLxkXFzMyycVExM7NsXFTMzCybOp9RP0nSPZK2SNos6Ysp\n/hVJ/yJpQ3qdUepzqaRuSU9ImlWKz06xbkkLS/FjJD0gaaukm9Oz6s3MrE3q3FLZC/xVRLwfmAEs\nkDQ1LbsqIqal12qAtGwucCwwG7hW0pj0jPtrgNOBqcC5pTxXplxTgN3AhTWuj5mZDaG2ohIROyLi\noTT9IrAFmDBIlznAiojYExFPAd0Uz7KfDnRHxJMR8TKwApgjScCpwMrUfxlwVj1rY2ZmVYzIMRVJ\nk4HjgQdS6CJJGyUtlTQ+xSYA20rdelJsoPhhwHMRsbdfvNHnz5fUJamrt7c3wxqZmVkjtRcVSQcB\ntwIXR8QLwGLgd4FpwA7g7/qaNugeLcTfGIxYEhGdEdHZ0dHR5BqYmVlV+9aZXNJ+FAXlxoj4AUBE\nPFNa/h3gjjTbA0wqdZ8IbE/TjeLPAuMk7Zu2VsrtzcysDeo8+0vAdcCWiPhGKX5UqdnHgUfT9Cpg\nrqT9JR0DTAEeBNYBU9KZXmMpDuaviogA7gHOTv3nAbfXtT5mZja0OrdUTgY+DWyStCHF/obi7K1p\nFLuqngY+CxARmyXdAjxGcebYgoh4BUDSRcAaYAywNCI2p3yXACskfQ14mKKImZlZm9RWVCLiZzQ+\n7rF6kD6LgEUN4qsb9YuIJynODjMzszcBX1FvZmbZuKiYmVk2LipmZpaNi4qZmWXjomJmZtlUKiqS\n7qoSMzOzt7dBTymWdADwDuDwdI+uvlOEDwbeXfPYzMxslBnqOpXPAhdTFJD1vF5UXqC4Hb2Zmdlr\nBi0qEfFN4JuSvhAR3xqhMZmZ2ShV6Yr6iPiWpD8EJpf7RMTymsZlZmajUKWiIul7FLer3wC8ksIB\nuKiYmdlrqt77qxOYmu4MbGZm1lDV61QeBX6nzoGYmdnoV3VL5XDgMUkPAnv6ghFxZi2jMjOzUalq\nUflKnYMwM7O3hqpnf/2vugdiZmajX9Wzv16kONsLYCywH/DbiDi4roGZmdnoU+lAfUS8KyIOTq8D\ngE8A3x6sj6RJku6RtEXSZklfTPFDJa2VtDW9j09xSbpaUrekjZJOKOWal9pvlTSvFD9R0qbU52pJ\njZ40aWZmI6SluxRHxA+BU4dothf4q4h4PzADWCBpKrAQuCsipgB3pXmA04Ep6TUfWAxFEQIuA06i\neHTwZX2FKLWZX+o3u5X1MTOzPKru/vrT0uw+FNetDHrNSkTsAHak6RclbQEmAHOAU1KzZcC9wCUp\nvjxdC3O/pHGSjkpt10bErjSWtcBsSfcCB0fEfSm+HDgLuLPKOpmZWX5Vz/76k9L0XuBpiiJQiaTJ\nwPHAA8CRqeAQETskHZGaTQC2lbr1pNhg8Z4GcTMza5OqZ39d0OoHSDoIuBW4OCJeGOSwR6MF0UK8\n0RjmU+wm4+ijjx5qyGZm1qKqD+maKOk2STslPSPpVkkTK/Tbj6Kg3BgRP0jhZ9JuLdL7zhTvASaV\nuk8Etg8Rn9gg/gYRsSQiOiOis6OjY6hhm5lZi6oeqL8eWEXxXJUJwI9SbEDpTKzrgC0R8Y3SolVA\n3xlc84DbS/Hz0llgM4Dn026yNcBMSePTAfqZwJq07EVJM9JnnVfKZWZmbVD1mEpHRJSLyA2SLh6i\nz8nAp4FNkjak2N8AXwdukXQh8EvgnLRsNXAG0A28BFwAEBG7JF0BrEvtLu87aA98HrgBOJDiAL0P\n0puZtVHVovKspE8BN6X5c4FfD9YhIn5G4+MeAKc1aB/AggFyLQWWNoh3AccNNg4zMxs5VXd//QXw\nZ8CvKE4TPpu0JWFmZtan6pbKFcC8iNgNr12Q+N8oio2ZmRlQfUvlA30FBYrjHBTXnZiZmb2malHZ\np3RrlL4tlapbOWZm9jZRtTD8HfB/JK2kuMDwz4BFtY3KzMxGpapX1C+X1EVxE0kBfxoRj9U6MjMz\nG3Uq78JKRcSFxMzMBtTSre/NzMwacVExM7NsXFTMzCwbFxUzM8vGRcXMzLJxUTEzs2xcVMzMLBsX\nFTMzy8ZFxczMsnFRMTOzbGorKpKWStop6dFS7CuS/kXShvQ6o7TsUkndkp6QNKsUn51i3ZIWluLH\nSHpA0lZJN0saW9e6mJlZNXVuqdwAzG4QvyoipqXXagBJU4G5wLGpz7WSxkgaA1wDnA5MBc5NbQGu\nTLmmALuBC2tcFzMzq6C2ohIRPwV2VWw+B1gREXsi4imgG5ieXt0R8WREvAysAOZIEsUdk1em/suA\ns7KugJmZNa0dx1QukrQx7R7re/DXBGBbqU1Pig0UPwx4LiL29oubmVkbjXRRWQz8LjAN2EHx8C8o\nntHSX7QQb0jSfEldkrp6e3ubG7GZmVU2okUlIp6JiFci4lXgOxS7t6DY0phUajoR2D5I/FlgnKR9\n+8UH+twlEdEZEZ0dHR15VsbMzN5gRIuKpKNKsx8H+s4MWwXMlbS/pGOAKcCDwDpgSjrTayzFwfxV\nERHAPcDZqf884PaRWAczMxtY5Sc/NkvSTcApwOGSeoDLgFMkTaPYVfU08FmAiNgs6RaKJ0vuBRZE\nxCspz0XAGmAMsDQiNqePuARYIelrwMPAdXWti5mZVVNbUYmIcxuEB/zDHxGLgEUN4quB1Q3iT/L6\n7jMzM3sT8BX1ZmaWjYuKmZll46JiZmbZuKiYmVk2LipmZpaNi4qZmWXjomJmZtm4qJiZWTYuKmZm\nlo2LipmZZeOiYmZm2biomJlZNi4qZmaWjYuKmZll46JiZmbZuKiYmVk2LipmZpaNi4qZmWVTW1GR\ntFTSTkmPlmKHSloraWt6H5/iknS1pG5JGyWdUOozL7XfKmleKX6ipE2pz9WSVNe6mJlZNXVuqdwA\nzO4XWwjcFRFTgLvSPMDpwJT0mg8shqIIAZcBJ1E8j/6yvkKU2swv9ev/WWZmNsJqKyoR8VNgV7/w\nHGBZml4GnFWKL4/C/cA4SUcBs4C1EbErInYDa4HZadnBEXFfRASwvJTLzMzaZKSPqRwZETsA0vsR\nKT4B2FZq15Nig8V7GsQbkjRfUpekrt7e3mGvhJmZNfZmOVDf6HhItBBvKCKWRERnRHR2dHS0OEQz\nMxvKSBeVZ9KuK9L7zhTvASaV2k0Etg8Rn9ggbmZmbTTSRWUV0HcG1zzg9lL8vHQW2Azg+bR7bA0w\nU9L4dIB+JrAmLXtR0ox01td5pVxmZtYm+9aVWNJNwCnA4ZJ6KM7i+jpwi6QLgV8C56Tmq4EzgG7g\nJeACgIjYJekKYF1qd3lE9B38/zzFGWYHAneml5mZtVFtRSUizh1g0WkN2gawYIA8S4GlDeJdwHHD\nGaOZmeX1ZjlQb2ZmbwEuKmZmlo2LipmZZeOiYmZm2biomJlZNi4qZmaWjYuKmZllU9t1KjZ6rLw+\nz1MDzr7gx1nymNno5S0VMzPLxkXFzMyycVExM7NsXFTMzCwbFxUzM8vGRcXMzLJxUTEzs2xcVMzM\nLBsXFTMzy6YtRUXS05I2SdogqSvFDpW0VtLW9D4+xSXpakndkjZKOqGUZ15qv1XSvHasi5mZva6d\nWyofjohpEdGZ5hcCd0XEFOCuNA9wOjAlveYDi6EoQhTPvT8JmA5c1leIzMysPd5Mu7/mAMvS9DLg\nrFJ8eRTuB8ZJOgqYBayNiF0RsRtYC+S5iZWZmbWkXUUlgJ9IWi9pfoodGRE7ANL7ESk+AdhW6tuT\nYgPFzcysTdp1l+KTI2K7pCOAtZIeH6StGsRikPgbExSFaz7A0Ucf3exYzcysorZsqUTE9vS+E7iN\n4pjIM2m3Ful9Z2reA0wqdZ8IbB8k3ujzlkREZ0R0dnR05FwVMzMrGfGiIumdkt7VNw3MBB4FVgF9\nZ3DNA25P06uA89JZYDOA59PusTXATEnj0wH6mSlmZmZt0o7dX0cCt0nq+/x/jIgfS1oH3CLpQuCX\nwDmp/WrgDKAbeAm4ACAidkm6AliX2l0eEbtGbjXMzKy/ES8qEfEk8MEG8V8DpzWIB7BggFxLgaW5\nx2hmZq15M51SbGZmo5yLipmZZeOiYmZm2biomJlZNi4qZmaWjYuKmZll46JiZmbZuKiYmVk2Lipm\nZpaNi4qZmWXjomJmZtm4qJiZWTYuKmZmlo2LipmZZeOiYmZm2biomJlZNi4qZmaWTTseJ5yVpNnA\nN4ExwHcj4uttHlJt7lvysWHn+ND8OzKMpP0uuG32sHNc//EfZxiJmZWN6i0VSWOAa4DTganAuZKm\ntndUZmZvX6N9S2U60J2ee4+kFcAc4LGhOvYu/n6WAXR8/lNZ8rwVXX3jrCx5/vKTa7LkMbP6jfai\nMgHYVprvAU5q01jsLeCM276WJc/qj/+X/2/+o7d+N0ve//GJz2TJY1YXRUS7x9AySecAsyLiM2n+\n08D0iPhCv3bzgflp9r3AExU/4nDg2UzDHYm8deZ23vpzj7a8deYebXnrzP1myfvvIqJjqEajfUul\nB5hUmp8IbO/fKCKWAEuaTS6pKyI6Wx/eyOatM7fz1p97tOWtM/doy1tn7tGWd1QfqAfWAVMkHSNp\nLDAXWNXmMZmZvW2N6i2ViNgr6SJgDcUpxUsjYnObh2Vm9rY1qosKQESsBlbXlL7pXWZtzltnbuet\nP/doy1tn7tGWt87coyrvqD5Qb2Zmby6j/ZiKmZm9ibioUNzqRdITkrolLWywfH9JN6flD0iaXCHn\nUkk7JT06wHJJujrl3CjphCbGO1TuUyQ9L2lDen25Yt5Jku6RtEXSZklfzDHuinmbHrOkAyQ9KOmR\nlPerDdo0/bNrIvf5knpLY658EYmkMZIelvSG++a0OuYKeYcz3qclbUr9uhosb+n7XCFvq9/lcZJW\nSno8fe8+lGm8Q+VtdbzvLfXZIOkFSRe3MuZGfx8kHSppraSt6X38AH3npTZbJc2rMvY3iIi39Yvi\nAP/PgfcAY4FHgKn92vwH4B/S9Fzg5gp5/xg4AXh0gOVnAHcCAmYADzQx5qFynwLc0cK/xVHACWn6\nXcA/N/i3aHrcFfM2PeY0hoPS9H7AA8CM4f7smsh9PvDtFr93XwL+sdE6tzrmCnmHM96ngcMHWd7S\n97lC3la/y8uAz6TpscC4TOMdKm9L4+2XYwzwK4rrQpoec6O/D8B/BRam6YXAlQ36HQo8md7Hp+nx\nzY7fWyqlW71ExMtA361eyuZQfJkAVgKnSdJgSSPip8CuQZrMAZZH4X5gnKSjqgy4Qu6WRMSOiHgo\nTb8IbKG4a0FZ0+OumLeV8UZE/CbN7pde/Q8SNv2zayJ3SyRNBD4KDHSZfUtjrpC3Ti1/n3OTdDDF\nH9brACLi5Yh4rl+zpsdbMW8OpwE/j4hftDLmAf4+lL9Ty4CzGnzuLGBtROyKiN3AWqDpO7e6qDS+\n1Uv/P3ivtYmIvcDzwGEj8LnD8aG06+ZOScc22zntcjme4n/oZcMa9yB5oYUxp909G4CdFL8QA463\n2Z9dhdwAn0i7IlZKmtRgeSN/D/w18OoAy1sd81B5Wx0vFAX1J5LWq7hDxYBjTqp+L4bKC81/L94D\n9ALXp12B35X0zgzjrZK3lfH2Nxe4qUF8OL97R0bEDij+kwcckTn/a1xUik3J/vr/j7RKmzo+t1UP\nUWw6fxD4FvDDZjpLOgi4Fbg4Il7ov7hBl0rjHiJvS2OOiFciYhrF3RSmSzou13gr5P4RMDkiPgD8\nT17/n+CAJH0M2BkR6wdr1uyYK+ZterwlJ0fECRR3BF8g6Y+HO+aKeVv5XuxLsftncUQcD/yWYpfP\ncMdbJe9wf/fGAmcC/9RocQtjburjc+R3Ual2q5fX2kjaFziE4e9+qnSLmVZExAt9u26iuI5nP0mH\nV+kraT+KP/w3RsQPGjRpadxD5R3OmFOf54B7eePm+rB/dgPljohfR8SeNPsd4MQK6U4GzpT0NMWu\n1lMl9b9lditjHjJvi+Pt67s9ve8EbqPYbdxwzEml78VQeVv8XvQAPaUty5UUxWC44x0y73C/xxTF\n9aGIeGaAz2/1b8YzfbvK0vvOzPlf46JS7VYvq4C+MyHOBu6OdGRrGFYB56UzOmYAz/dtng6XpN/p\n2wcvaTrFz/nXFfqJYn/xloj4Rq5xV8nbypgldUgal6YPBD4CPN5gvE3/7Krk7rc/+0yKY0WDiohL\nI2JiREym+K7dHRH9n5/Q9Jir5G1lvKnfOyW9q28amAn0P/Owle/FkHlb+V5ExK+AbZLem0Kn8cbH\nYTQ93ip5W/3dKzmXxru+Whpzv75936l5wO0N2qwBZkoar+LssJkp1pwYxlkKb5UXxVkV/0xxFth/\nTrHLgTPT9AEUm6PdwIPAeyrkvAnYAfxfiv8BXAh8DvhcWi6KB4z9HNgEdDYx3qFyXwRspjiT7X7g\nDyvm/SOKzd2NwIb0OmO4466Yt+kxAx8AHk55HwW+nONn10Tuvy2N+R7gfU1+704hnSmUY8wV8rY0\nXopjCY+k12Ze/x0Z7veiSt5Wv8vTgK708/shxdlMw/79q5C3pfGmvu+gKECHlGJNj5nGfx8OA+4C\ntqb3Q1PbToon5vb1/Yv0vesGLmjme9f38hX1ZmaWjXd/mZlZNi4qZmaWjYuKmZll46JiZmbZuKiY\nmVk2LipmQ5A0WQPcEXqA9udLendp/ukmL4AzG7VcVMzyOx9491CNytKV82ajnouKWTX7SlpWuhnj\nOyR9WdI6SY9KWpKudD6b4oKyG1U8F+PA1P8Lkh5S8eyQ9wFI+krq9xNguYpnuFyf2jws6cOp3UDx\n8yX9UNKPJD0l6SJJX0pt7pd0aGr3l5IeS2NfMfL/dPZ24qJiVs17gSVR3IzxBYpnnnw7Iv4gIo4D\nDgQ+FhErKa64/mRETIuIf039n43ixomLgf9YynsiMCci/hxYABAR/57idh3LJB0wSBzgOODPKe6Z\ntQh4KYqbHd4HnJfaLASOT2P/XNZ/FbN+XFTMqtkWEf87TX+f4tYzH1bxZMZNwKnAYLc577uJ5npg\ncim+qlR4/gj4HkBEPA78Avj9QeIA90TEixHRS3GL/B+l+KbS52yk2HL6FLC3iXU2a5qLilk1/e9n\nFMC1wNlpC+I7FPfsGkjf3YFfobiFep/flqYHehDXYA/o2lOafrU0/2rpcz5Kcc+oE4H1Pn5jdXJR\nMavmaL3+PPJzgZ+l6WdVPCfm7FLbFykem9ysnwKfBJD0+8DRwBODxIckaR9gUkTcQ/EAr3HAQS2M\nzawS/4/FrJotwDxJ/53iTq+LKe5Qu4niOevrSm1vAP5B0r8CH6K6a1O/TRS7qc6PiD2SBopXyTkG\n+L6kQyi2eK6Keh6BawbguxSbmVk+3v1lZmbZuKiYmVk2LipmZpaNi4qZmWXjomJmZtm4qJiZWTYu\nKmZmlo2LipmZZfP/AOyWje7G01rZAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x8a696fd0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(train_data.bathrooms);\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 402,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5,1,u'bathrooms')"
      ]
     },
     "execution_count": 402,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXUAAAEICAYAAACgQWTXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3X2UXHWd5/H3tyvVUEmkgU5EBJI0\nKz7gtIKpWXVwXMfGkaeYmJ0zixsn+LAnTtQdHd2dA8MchTmb3VnXMxt8Gu2jIFHGh1XGEZVR6BnH\nWcdBOzwYkGFhSIIxKNEMjUJIOp3v/lG3OlXV9XCfqurW7c/rnJyuvnXr93jvtyu37rd+5u6IiEg+\nDPW7ASIikh4FdRGRHFFQFxHJEQV1EZEcUVAXEckRBXURkRxRUJfMMLM9ZnZhSmW5mT0njbJEBomC\nugw8M/u2mf2nfrdDJAsU1GXRM7Ml/W6DSFoU1CVrft3MfmRm/2pmN5jZiWZ2ipl9zcwOBNu/ZmZn\nApjZNuA3gY+Y2a/M7CM1ZV1oZg8Gr/momVnwmjeZ2XfN7H+b2UHgGjMbMrM/MbO9ZvaYme0ws5Fq\nQWb2OjO7z8weD/5n8IKa5/aY2X81sx+a2ZNm9ikzO83MbjWzX5rZ7WZ2SrDviWb2WTP7RVDWD8zs\ntF4MrCwOCuqSNZuA1wL/Bngu8CdUjtMbgNXAKuAQ8BEAd78a+Afgne6+3N3fWVPWZcCvAy8Gfjco\nt+qlwMPAM4FtwJuCf78FnA0sr9ZhZs8FPge8G1gJfAO4xcyGa8r798BrgjavA24F/hhYEbT/D4L9\nrgBGgLOAUeD3g/6IpEJBXbLmI+7+Y3c/SCXYvsHdf+HuX3b3p9z9l8H2fxeirD9z98fd/RHg74Dz\nap7b7+4fdvej7n6Iyh+TP3f3h939V8BVwOXBpZn/AHzd3W9z91ngg0AJ+I2a8j7s7j9z959Q+SNz\nh7vf5e6Hgb8Czg/2m6USzJ/j7nPuvtPdn4g1UiJNKKhL1vy45vFe4NlmttTMPhFcGnkC+A5wspkV\nOpT105rHT1F5992sHoBnB/XV1r0EOK3xOXc/Frz+jJr9f1bz+FCT36t1fwb4JvB5M9tvZh8ws2KH\nfoiEpqAuWXNWzeNVwH7gvcDzgJe6+0nAK4PnLfgZ56tGG1+zn8rlndq6j1IJznXPBdfmzwJ+ErlS\n91l3v9bdz6XyTv8yYHPUckRaUVCXrHmHmZ1pZqdSuSb9BeAZVN7tPh5sf3/Da35G5Tp4Ep8D/tDM\nxsxsOfDfgS+4+1Hgi8ClZjYRvKt+L3AY+MeolZjZb5nZePC/jCeoXI6ZS9h2kXkK6pI1fwl8i8qH\nmA8D/w3YTuUa9s+BfwL+puE11wG/E9zl8qGY9V5P5dLId4DdwNPAfwZw9weANwIfDtqwDljn7kdi\n1PMs4EtUAvr9wN8Dn43ZZpEFTItkiIjkh96pi4jkiIK6iEiOKKiLiOSIgrqISI709IuMVqxY4WvW\nrOlllSIiA2/nzp0/d/eVYfbtaVBfs2YN09PTvaxSRGTgmdnezntV6PKLiEiOKKiLiOSIgrqISI4o\nqIuI5IiCuohIjnQM6mZ2fbC817012041s9uCpcJuqy7VJSIi/RXmnfqngYsatl0JTLn7OcBU8Hsm\n7LppF9vXbOfaoWvZvmY7u27a1e8miYj0TMeg7u7fAQ42bF4P3Bg8vhHYkHK7Ytl10y5u2XILM3tn\nwGFm7wy3bLlFgV1EFo2419RPc/dHAYKfz0yvSfFNXT3F7FOzddtmn5pl6uqpPrVIRKS3uv5BqZlt\nMbNpM5s+cOBAV+uaeWQm0nYRkbyJG9R/ZmanAwQ/H2u1o7tPunvZ3csrV4b66oLYRlaNRNouIpI3\ncYP6V4ErgsdXAH+dTnOSmdg2QXFp/cLsxaVFJrZN9KlFIiK9FeaWxs8B3wOeZ2b7zOytwJ8BrzGz\nB4HXBL/33fimcdZNrmNk9QgYjKweYd3kOsY3jfe7aSIiPdHTNUrL5bLrWxpFRKIxs53uXg6zrzJK\nRURyREFdRCRHFNRFRHJEQV1EJEcU1EVEckRBXUQkRxTURURyREFdRCRHFNRFRHJEQV1EJEcU1EVE\nckRBXUQkRxTURURyREFdRCRHFNRFRHJEQV1EJEcU1EVEckRBXUQkRxTURURyREFdRCRHFNRFRHJE\nQV1EJEcU1EVEckRBXUQkRxTURURyREFdRCRHFNRFRHJEQV1EJEcU1EVEckRBXUQkRxTURURyJFFQ\nN7M/NLP7zOxeM/ucmZ2YVsNERCS62EHdzM4A/gAou/uvAQXg8rQaJiIi0SW9/LIEKJnZEmApsD95\nk0REJK7YQd3dfwJ8EHgEeBSYcfdvNe5nZlvMbNrMpg8cOBC/pSIi0lGSyy+nAOuBMeDZwDIze2Pj\nfu4+6e5ldy+vXLkyfktFRKSjJJdfLgR2u/sBd58FbgZ+I51miYhIHEmC+iPAy8xsqZkZMAHcn06z\nREQkjiTX1O8AvgTcCewKyppMqV0iIhLDkiQvdvf3A+9PqS0iIpKQMkpFRHJEQV1EJEcU1EVEckRB\nXUQkRxTURURyREFdRCRHFNRFRHJEQV1EJEcU1EVEckRBXUQkRxTURURyJNF3v4iI9Muum3YxdfUU\nM4/MMLJqhIltE4xvGu93s/pOQV1EBs6um3Zxy5ZbmH1qFoCZvTPcsuUWgEUf2HX5RUQGztTVU/MB\nvWr2qVmmrp7qU4uyQ0FdRAbOzCMzkbYvJgrqIjJwRlaNRNq+mCioi8jAmdg2QXFpsW5bcWmRiW0T\nfWpRduiDUhEZONUPQ3X3y0IK6iIykMY3jSuIN6HLLyIiOaKgLiKSI7r8Iokps08kOxTUJRFl9olk\niy6/SCLK7BPJFgV1SUSZfSLZoqAuiSizTyRbFNQlEWX2iWSLPiiVRJTZJ5ItCuqSmDL7RLJDl19E\nRHIk0Tt1MzsZ+CTwa4ADb3H376XRMGltkJN9Brnt0h1ZOCay0Ia0JL38ch3wN+7+O2Y2DCxNoU3S\nxiAn+wxy26U7snBMZKENaYp9+cXMTgJeCXwKwN2PuPvjaTVMmhvkZJ9Bbrt0RxaOiSy0IU1Jrqmf\nDRwAbjCzu8zsk2a2rHEnM9tiZtNmNn3gwIEE1QkMdrLPILdduiMLx0QW2pCmJEF9CfAS4C/c/Xzg\nSeDKxp3cfdLdy+5eXrlyZYLqBAY72WeQ2y7dkYVjIgttSFOSoL4P2OfudwS/f4lKkJcuGuRkn0Fu\nu3RHFo6JLLQhTbGDurv/FPixmT0v2DQB/CiVVklL45vGWTe5jpHVI2AwsnqEdZPrBuIDnUFuu3RH\nFo6JLLQhTebu8V9sdh6VWxqHgYeBN7v7v7bav1wu+/T0dOz6REQWIzPb6e7lMPsmuqXR3e8GQlUk\nIiLdp4xSEZEc0Xe/SGYNcpZfltue5bYNknbj2M8xVlCXTBrkLL8stz3LbRsk7cYR6OsY6/KLZNIg\nZ/llue1ZbtsgaTeO/R5jvVOXTBrkLL8stz3LbRskccaxV2Osd+qSSYOc5Zfltme5bYOk3Tj2e4wV\n1CWTBjnLL8ttz3LbBkm7cez3GOvyi2TSIC+Tl+W2Z7ltgyTMOPZrjBNllEaljFIRkeiiZJTq8ouI\nSI4oqIuI5MiiuKbe6+yutOsLU96gZgnOt3vvDFYwfM4pjZYAOHTwUN/mC+qviZ5zyTk8+I0Huza+\nSeYvi3Nf26bSqSWOPn2U2SeP37tdGi1x8XUXM75pfGDOz8bXdfuYiCv319QbM7+g8kl0t75aM+36\nwpTX6z6mpVm7m+n1fA0VhzAz5o7M9aRNSeYvi3Mfdl4LwwXOf+v53HPjPZk/P8P0qZvt1jX1Gr3O\n7kq7vjDl9TuDLa5m7W6m1/N1bPZY24CedpuSzF8W5z7svM4dmWPn5M6BOD/D9Knf416V+6De6wy6\ntOsLU96gZglGaV+v56vbrw1TTpjyszj3Uer2ueZXCrJ2foZtTxbOudwH9V5nd6VdX5jy+p3BFleU\n9vV6vrr92jDlhCk/i3MfpW4rWOIyoog7XmHbk4VzLvdBvdfZXWnXF6a8fmewxdWs3c30er6GikMU\nhgs9a1OS+cvi3Ied18JwgbVb1g7E+RmmT/0e96rCNddc07PKJicnr9myZUvP6gM47UWncfKak9m/\ncz+HnzjMyOoRLtp+Udc+REq7vjDl9bqPaalr98zhyrs2r9wZUVxa5OjTR/syX5d86BKev/75ddvG\n/+M4Tx54sivjm2T+sjj3jW0qjZawIePY7LH5fUqjJS77+GW84qpXDMT52ex13TwmGl177bWPXnPN\nNZNh9s393S8iIoNOd7+IiCxSCuoiIjkykBmlvchA6+X6g2Ez1eJmtMVpb6ssy1vfdSuHfnEIqM8K\njNPPJOMWZSw61RspY7cm83VkdZO5afF8kjFptU8WM0mrwrYtyXmWtP/dLr9fBu6aei8y6NrVAXQ9\nY7RRcWmRF1/x4gWZd832a2xHnPFqlWXpx3zBfcWF4QLrr1/f08zHKNl90H6+4mbs1u7bbm7a9TFJ\ntnCzOvudSVoVdq6TnGdJj6dOr89apm6Ua+oDF9S3r9nOzN6FN/iPrB7h3XvenajsMHUAqdbfqq5G\n1Xd/nTS2I854hW1TmLKStCNqWc3KhvbzFaZdnerrNDet+pik7lZ1pnkexBV2rpOcZ0mPp06v70Wc\niSJKUB+4yy+9yKDr5fqDYV8XJqA3K68Xfel15mMa2X3V55Jk7FZ1mpuofQ9Td68zMaMI298kx2bS\n46nT67OYqRvWwH1Q2osMul6uPxj2da0y7zqVF6e9UfvS68zHKNl9nepNkrFb1WluovY9TN29zsSM\nImx/k5xnSY+nbpffTwMX1HuRQdfL9QfDZqo1y7xrtl9jO+K0t1WWZbNAUhgu9DzzMUp2X6d642bs\n1u7bbm7a9TFJtnCvMzGjCDvXSc6zpMdTt8vvp4HLKO1FBl27OnqRMdosU61Z5l2YjLY47W2VZfmC\n17+APX+/h6OHjgLHswJ7nfkYJbuvU72RM3ZrMl+bzk2T51v1MUm2cK8zMaMIO9dJzrOkx1O3y0+b\nMkpFRHJEGaUiIouUgrqISI4kvqXRzArANPATd78seZNaa5lFWHs/qVU+vJs73H7lmtr1MGvXTyyN\nlnjh775wvtx29yAXTigwtGSo5dqL7dqeKMNu7wwYEDTLhgw/5nXb6voRrBMJzGeDVsdqeNkwR351\nZMFrq334+tu/zs7JnS3HoFV2ZW3maXFZ5QOnxjG+74v3tcxObSyjVnFZkSUnLuHQLw7N198qi3PH\nhTvYPbW7adur9UJlPdTGtTSrbbrrhrtallEd+8Z1VU99zqns+faeyrhZsF/jGFbHvMm83fmpOzl2\n5BiRdTj+h4aHmpYbNg+itt2dMmrbZt7WHpNNxn6+qprxbTY3QLis0Npzucm5UxotcfiXh+vGpnBC\nk3GsnjNPHql8k+iho/PnXnV74/xbwVi7ZS2XfuzScOObUOJr6mb2HqAMnNQpqCe5ph523cMsaMyy\n7FaGXbf7sOo3V7UNiI2qmY53fvLOuq9ZjVLn+uvXA/CVN38lVhnVdqybXNc2GEs6omQ7h9kvCisY\nQ4WhuqUHO2WF9lN5azl2YO9ZRqmZnQncCGwD3tPNoB41y7HfwmQFppFhlzWR3vE1kVZfR1aPDMR4\n5UHYOU96bITVKSu0X6xgvO/o++K9tocZpduBPwKe0aYxW4AtAKtWrYpd0SBkctWKs4boIGexVSU9\nabu97qekL+yc9yKgQ+es0H7pVf9jf1BqZpcBj7n7znb7ufuku5fdvbxy5cq41Q1EJletOGuIxsmw\ny5qwma+tpNXXQRmvPAg750mPjbA6ZYX2S6/6n+TulwuA15nZHuDzwKvN7LOptKqJsOseZkFjlmW3\nMuy6qTBcYGxiLNJrqpmOQ8V4h1V13Ca2TcQuo9qOiW0Tkdsv0UXJdg6zXxRWsAVryXbKCu2ntVvW\n9qSe2GeOu1/l7me6+xrgcuBv3f2NqbWswfimcdZNrqtcc7XKdbPy1vL8Ndh5VvnUupPqX83SaGn+\n7ozq77XltvvrWjihUPfa6usbv4q2WdubfYVnu/3qngv6Od+XIVuwra4fVvm9eofG8RfC8PLhpq9d\nf/16Nt++mfLWctsxqD5XbeulH7uUDTdsqKuruKzYdIxr96kdt/FN4wvKqFVcVpx/rlp/YzvGN42z\n+fbNHQP7/LhY82Nh42c3ti2jOva15YysHmFsYuz4uFmL48gaftaMzdBwzFOzw/HfqtxI7yKDXWvn\nvPbYbDUndftZ+7Gfr2qo9Xn6+htfz/rr17c8rxrPmdr5aFZ+49g0HcfqOWOV47D23Ktub5x/K1ii\nD0mjSiWj1MxeBfyXbn5QKiKyWPX8q3fd/dvAt9MoS0RE4lNGqYhIjmR+kYzGzMKwGZth1+9sWkaH\ndSabZsXVZDeOrK7Pdu2U9Rh2ncbaDLbGLLXGNtVm3zXLwGyXSVmnJuu0sf2wMJuvcVvSdVRrM1qt\nYKx51Rp+evdPFxwP8/U2zF1t/bUZjI1taxzbNa9aw8GHDoY6fqKsZRlm38Y+t5rnKOvZhmpTQ6Zy\nrdrzLo3s6Faane+1mdFpr9Eadf3XVjGi3XHW67VNM/0tjbtu2tU0szBMxmajOOtyNnttWllqYdZi\nbPZco/LWMqsuWNXzzLmh4hBmVpfN12xboyiZtGe+/MxQGaHNMgvT1ur4ibKWZZh9v/72rzP9FwvP\nkbDzHHX90ijHc2G4wPlvPT9U2XHXxg2TSZzWGq1R13+NmxGbxtqmuVmjtF1GWJQ1JJu9Jkpdta9N\nM0ut01qMrZ6rZQXjpDNPylTmXCdhM2mzptnxE2UtyzD7/umSP22apBJlnqOsXxp17MOW3e21cdNY\nozXq+q9JMmKTrm2amzVKw66jmeaalZ2e78VaqFHq8TnPXOZcJ2EzabOmWTujZAGH2bdV0Igyz1HW\nL4069mHL7vbauGms0Rp1/dckGaG9PMYz/UFp2HU0k65LGKaMbmSpdVqLMUxdVrDMZc51EjaTNmua\ntTPKWpZh9m11v3iUeY6yfmnUsQ9bdrfXxk1jjdao678myQjt5TGe6aDeKrMwTMZmozjrcjZ7bVpZ\namHWYgxT19ota/uSOTdUHFqQzddsW6MombRhM0KbZRamrdXxE2UtyzD7tso6DDvPUdcvjXLsFIYL\nocuOuzZumEzitNZojbr+a9yM2F6vbZrpNUpPe9FpnHr2qR3XxYyyZmW7usKsM9lYV2m0NH/nRO1r\n5uuvKatZmVHWaSwuK1b+CxiUVf79SpZaszbZkM1/4FRcVuSEk06oa+N8u58+umD/OkGm3NyRubr2\nX/KhS3j++ucvWMe0cVuSdVR/+4O/zZOPPcmjdz063+exV49x+InDdcfDuk+sO15vw9zV1l/b58bn\nGsd27NVj+DHvePxEWcsyzL7PvfS5C/rcap6jrGcbqk0zh+uyLWtVz7uwZcddG7fZ+X7em85L1Md2\n9UVZ/7XdWrTtjrM01jbVGqUiIjmiNUpFRBYpBXURkRzJ9C2N0JDtJiKLW4uM10ExNjHG5ts3d7WO\nTL9Tr2Z8KaCLCDDQAR1g99Rudly4o6t1ZDqoT109lZlFY0VE0tDtxdAzHdQHJdNQRCQrMh3UByXT\nUEQkKzId1LO2xqCISFLdXjs300F9wbqcIrK4xf/6lUzoxd0vmb+lsboQsYiIdJbpd+oiIhJN5t+p\nQ8il2g4eqluSDKt8q9zc4cpqOLXLnn3lLV/h2JH2q6u007jEWLWNt7ztlrol4YaXD9d9+VizpbAi\nJ1NE3D/JF/vHUf3vZeOyZK0MLx/mRb/3oqbLyjWqjvuqC1a1PB7g+Fw/8t1H5peFqytnyOaPkWZj\naUPG2rdV5jdM8ltxWZG52bkFx1ThhOPHX1zDyytLCfZKdSm/ff+0b8Gx3Mt2ZF1tDOh0rFePt2ZL\nY3albVn/Qq+0lo+D9ANceWt5flJv3nwzNPk7MbRkiA2f3gB0XpouL1acu4KDDx7suCxZXENLhjh2\ntEPZKWQejk2Mse97+xbFnEk8YxNj7P3O3tDHetyl7XKznB1ke6kzKxjvO/q+UEvhQeel6UQk/+Is\nbZeb5ewg2wlI1Xf9YZfCExHpdjzI/AelWU5Aqi5vFWYpvCz3Q0R6p9uxIPNBPc0EpCRrDDZTXXps\nYttEy5EcWjIUemm6vFhx7opQy5LFNbQkRNkpTPXYxNiimTOJZ2xiLNKx3oul7TIf1OsSkCxYhm3Z\n8ROtuKxIabQEVnlsQ8HZbJW7D6pKoyVef+Pr2fjZjQwNJ+u2FWz+Q9JqGzfu2FjXLqjcMbDh0xvm\n77WvTaSa/wMTNfhE3D/tP2SdjE2M8Y773sGGGzZU5qWD4eXDlLeW5+e3bg4bVMd9w6c3tDweoLJt\n42c2Ut5abtr/2mOkaT1DlXo23745VPJbcVmx6TFVe/zFNbx8OHEZUVjBKn/MmhzLclz1WNx8++aO\nx3r1eBtZPRLrQ9LIbcv6B6UiIoudlrMTEVmkFNRFRHIk9i2NZnYWsAN4FpW0m0l3vy6thlVda9em\nXaSISN+839/f1fKTvFM/CrzX3V8AvAx4h5mdm06zKhTQRSRvuh3XYgd1d3/U3e8MHv8SuB84I62G\niYhIdKlcUzezNcD5wB1NnttiZtNmNn3gwIE0qhMRkRYSB3UzWw58GXi3uz/R+Ly7T7p72d3LK1eu\nTFqdiIi0kSiom1mRSkC/yd1vTqdJIiISV+ygbmYGfAq4393/PL0mHdftT4lFRHqt23Etybc0XgD8\nHrDLzO4Otv2xu38jebOOU2AXEQkvdlB39//LwC8DKyKSL8ooFRHJkcwvkrHrpl2J1xSNy4qGz/bu\nC89EJN+qa/h2U6bfqe+6aRc3v/HmvgR0QAFdRFK1e2o3Oy7c0dU6Mh3Up66e6ncTRERStXtqd1fL\nz3RQ19qeIiLRZDqoa11PEZFoMh3Uu72Wn4hIr41NjHW1/EwH9fFN46msKRqXFXUbvoikpxd3v2T+\nlsbqos0iItJZpt+pi4hINArqIiI5kvnLL1rSTkTyJMtrlHadArqI5E1m1ygVEZHsUVAXEckRBXUR\nkRxRUBcRyZFMB3UtZScieZPlNUp7QoFdRCS8TL9TFxGRaBTURURyREFdRCRHFNRFRHJEQV1EJEcU\n1EVEckRBXUQkRxTURURyREFdRCRHFNRFRHJEQV1EJEcSBXUzu8jMHjCzh8zsyrQaJSIi8cT+Qi8z\nKwAfBV4D7AN+YGZfdfcfpdW4Wjsu3MHuqd2xX18aLXH06aPMPjmbYqsqisuKAJ3LNsBrfjY+PWSs\nfdtaVl2wilvfdSuHfnEIgKHhIY4dORapTaXREod/eTjU60qjJS6+7mIe+e4jTH98umnbohhePswZ\nLz2D3X+7+3hZQZ9LoyWASt9ajANUxsKPOSOrRzjnknO4Z8c9qcxd4cQCc0/PJS4nrGZzV1xWZMmJ\nS+bnNzSD4tJionGYH/+DhxhZFW5sh5cPc+RXR9qWu+LcFcw+OcvM3plI7SkuKzI3O1c/Rs2OiwLQ\nMG1xzotWSqMlnnXes+qP2Tbm5zAYx4ltE4xvGmfXTbvqzt3GOi6+7mLGN42n0uZWzD3eGWxmLweu\ncffXBr9fBeDu/6PVa8rlsk9PT0euK2lAHzhtgl2u6hTJieLSIi++4sXc+ck7OTbb+g9NYbjA+uvX\nRw7sZrbT3cth9k1y+eUM4Mc1v+8LtqVuUQV06E9wVUAXiW32qVl2Tu5sG9AB5o7MMXX1VFfbkiSo\nW5NtC0KDmW0xs2kzmz5w4ECC6kREssvnwr0zmnkk2iWqqJIE9X3AWTW/nwnsb9zJ3Sfdvezu5ZUr\nVyaoTkQku6zQ7H3uQiOrRrrajiRB/QfAOWY2ZmbDwOXAV9NpVr2xibFuFJtd4Y6Nwa9TJCeKS4us\n3bKWoWL7kFoYLjCxbaKrbYkd1N39KPBO4JvA/cAX3f2+tBpWa/PtmxMH9tJoaf4ulbQVlxXDlW0N\nPxufHjLKW8ts/MzG+bsUoPIpf1Sl0VLo15VGS2z8zEbKW8upBPfh5cOV+aoty47XNd+3NnXZUOXJ\nkdUjlLeWU5u7womFVMoJq9kcFJcV6+Y3NCPxOMyPv4Uf2+Hlwx3LXXHuCkZWR38HWlxWXDhGzY6L\nJtMW57xopTRaWnjMtjE/h8E4rptcx6Ufu5QNN2xoObel0VKsD0mjin33Sxxx734REVnMenX3i4iI\nZIyCuohIjiioi4jkiIK6iEiOKKiLiORIT+9+MbMDwN6YL18B/DzF5gyKxdjvxdhnUL8Xk6h9Xu3u\nobI3exrUkzCz6bC39OTJYuz3YuwzqN/9bkcvdbPPuvwiIpIjCuoiIjkySEF9st8N6JPF2O/F2GdQ\nvxeTrvV5YK6pi4hIZ4P0Tl1ERDpQUBcRyZGBCOpmdpGZPWBmD5nZlf1uT1Rmdr2ZPWZm99ZsO9XM\nbjOzB4OfpwTbzcw+FPT1h2b2kprXXBHs/6CZXVGzfa2Z7Qpe8yEz6/u3o5vZWWb2d2Z2v5ndZ2bv\nCrbnvd8nmtn3zeyeoN/XBtvHzOyOoA9fCNYgwMxOCH5/KHh+TU1ZVwXbHzCz19Zsz+z5YGYFM7vL\nzL4W/J77fpvZnuA4vNvMpoNt/TvO3T3T/6h8k/K/AGcDw8A9wLn9blfEPrwSeAlwb822DwBXBo+v\nBP5n8PgS4FYq3+z8MuCOYPupwMPBz1OCx6cEz30feHnwmluBizPQ59OBlwSPnwH8P+DcRdBvA5YH\nj4vAHUF/vghcHmz/OLA1ePx24OPB48uBLwSPzw2O9ROAseAcKGT9fADeA/wl8LXg99z3G9gDrGjY\n1rfjvO8DEmLAXg58s+b3q4Cr+t2uGP1YQ31QfwA4PXh8OvBA8PgTwBsa9wPeAHyiZvsngm2nA/9c\ns71uv6z8A/4aeM1i6jewFLgTeCmV7MElwfb5Y5rKIjMvDx4vCfazxuO8ul+WzwcqS1pOAa8Gvhb0\nYzH0ew8Lg3rfjvNBuPxyBvDjmt/3BdsG3Wnu/ihA8POZwfZW/W23fV+T7ZkR/Nf6fCrvWnPf7+AS\nxN3AY8BtVN5hPu6V1cKgvq0Ylz6KAAACPUlEQVTz/QuenwFGiT4eWbAd+CPgWPD7KIuj3w58y8x2\nmtmWYFvfjvMlMTvRS82uH+X5PsxW/Y26PRPMbDnwZeDd7v5Em8uBuem3u88B55nZycBfAS9otlvw\nM2r/mr0R63u/zewy4DF332lmr6pubrJrrvoduMDd95vZM4HbzOyf2+zb9eN8EN6p7wPOqvn9TGB/\nn9qSpp+Z2ekAwc/Hgu2t+ttu+5lNtvedmRWpBPSb3P3mYHPu+13l7o8D36Zy7fRkM6u+iapt63z/\ngudHgINEH49+uwB4nZntAT5P5RLMdvLfb9x9f/DzMSp/xP8t/TzO+309KsT1qiVUPjQY4/gHJC/s\nd7ti9GMN9dfU/xf1H6R8IHh8KfUfpHw/2H4qsJvKhyinBI9PDZ77QbBv9YOUSzLQXwN2ANsbtue9\n3yuBk4PHJeAfgMuA/0P9B4ZvDx6/g/oPDL8YPH4h9R8YPkzlw8LMnw/Aqzj+QWmu+w0sA55R8/gf\ngYv6eZz3/QAIOXCXULl74l+Aq/vdnhjt/xzwKDBL5S/vW6lcP5wCHgx+VifQgI8Gfd0FlGvKeQvw\nUPDvzTXby8C9wWs+QpAp3Oc+v4LKfxN/CNwd/LtkEfT7RcBdQb/vBd4XbD+byl0MDwWB7oRg+4nB\n7w8Fz59dU9bVQd8eoOaOh6yfD9QH9Vz3O+jfPcG/+6rt6udxrq8JEBHJkUG4pi4iIiEpqIuI5IiC\nuohIjiioi4jkiIK6iEiOKKiLiOSIgrqISI78f/jOjSz5m4CBAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xfc074ac8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(range(train_data.shape[0]), train_data[\"bathrooms\"].values,color='purple')\n",
    "plt.title(\"bathrooms\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 403,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#删除大于等于6的点\n",
    "train_data = train_data[train_data['bathrooms']<6] \n",
    "test_data = test_data[test_data['bathrooms']<6] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 404,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5,1,u'bedrooms')"
      ]
     },
     "execution_count": 404,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW8AAAEICAYAAACQzXX2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAGmpJREFUeJzt3X9w3PWd3/HnS+uVkW0isKy7I/yw\nxd3leiYOP7xHSOikFOXuIMaB0Jk7KJekSeeUAp3LtZmm4dzCkIl702vnSjIJCWoCITmHQH7RGIe5\nJEqZu/YyBJmDCEJoDNjGQGKBQUnAwbL97h/7lVhJu9rvyrtafaTXY8aj1Xe/38/3/fn+eO36+93V\nRxGBmZmlpaPdBZiZWeMc3mZmCXJ4m5klyOFtZpYgh7eZWYIc3mZmCXJ4W9tI2i3p7U1o5/OSPtaM\nmsxS4fA2M0uQw9sWNUnL2l2DWSs4vK3dfk/SjyS9KOk2SccBSLpE0kOSXpL0D5LeNLGApLMlPSjp\nF5LuBI6reO4CSfsk/UdJPwVuy6b/qaRdkg5I+qak11cs81ZJD0gay36+teK5+yR9LKvhl5K2S+qR\ntE3Sz7P512XzStL/kLQ/a+uHkt7Y6g1oS5PD29rtKuAPgd8E3gD8J0nnALcCHwB6gFuAb0paLqkT\nuBv4IrAa+ArwL6a1+RvZc2uBAUkXAn8J/BFwErAH+DKApNXADuAT2br+GtghqaeivSuAdwMnZ3V+\nn/KLwmrgMeCGbL4/AN6W9eME4I+BF45p65jV4PC2dvtkRDwdEQeArcCVwJ8Ct0TE/RFxJCJuB14F\nzsv+FYGbImI8Ir4KPDCtzaPADRHxakQcpPwCcWtEPBgRrwLXAW/J3jFvAn4SEV+MiMMRcQfwY2Bz\nRXu3RcQTETEG3As8ERHfjYjDlF88zs7mGweOB/4JoIh4LCKea+K2Mpvk8LZ2e7ri8R7g9ZTfMX8o\nu2TykqSXgFOz514PPBNT/6LanmltjkbEryp+f33lPBHxS8rviE+e/lxFeydX/P6ziscHq/y+Kmv3\ne8AngU8BP5M0KOl1tTpudiwc3tZup1Y8Pg14lnKgb42IEyr+rcjeFT8HnCxJ05arNP1PZT5L+QUB\nAEkrKV8ieWb6cxXtPTOXzkTEJyJiI3AG5csn/2Eu7ZjV4/C2drtW0inZtee/AO4E/ifwbyS9ObsJ\nuFLSJknHU77efBj4M0nLJF0OnFtnHV8C3ifpLEnLgf8C3B8Ru4FvAW+Q9C+z9v4YWA/c02hHJP1e\nVnMReBn4FXCk0XbM8nB4W7t9Cfg28GT272MRMUz5uvcngReBXcC/AoiIQ8Dl2e8vUr4p+PXZVhAR\nQ8B/Br5G+Z37b1K+CUlEvABcAnyI8qWUDwOXRMTzc+jL6yi/8LxI+dLLC8B/n0M7ZnXJgzGYmaXH\n77zNzBLk8DYzS5DD28wsQQ5vM7MEteSP9qxZsybWrVvXiqbNzBalnTt3Ph8RvXnnb0l4r1u3juHh\n4VY0bWa2KEma/k3fWfmyiZlZghzeZmYJcnibmSXI4W1mliCHt5lZgnKFt6R/J+lRSY9IumNiqCoz\nM2uPuh8VlHQy8GfA+og4KOkuyn+R7fMtrq1tRraNMLRliLG9Y3Sf1k3/1n42XLWh3WUtSN5WZu2R\n93Pey4AuSePACsp/wH5RGtk2wvaB7Yy/Mg7A2J4xtg9sB3AoTeNtZdY+dS+bRMQzlP8m8V7Kfwt5\nLCK+3erC2mVoy9BkGE0Yf2WcoS1Dbapo4fK2MmufuuEt6UTgUqCP8nh/KyX9SZX5BiQNSxoeHR1t\nfqXzZGzvWEPTlzJvK7P2yXPD8u3AUxExGhHjlEcteev0mSJiMCJKEVHq7c399fwFp/u07oamL2Xe\nVmbtkye89wLnSVqRDfraDzzW2rLap39rP8UVxSnTiiuK9G/tb1NFC5e3lVn75LnmfT/wVeBBYCRb\nZrDFdbXNhqs2sHlwM91ru0HQvbabzYObfQOuCm8rs/ZpyRiWpVIp/FcFzczyk7QzIkp55/c3LM3M\nEuTwNjNLkMPbzCxBDm8zswQ5vM3MEuTwNjNLkMPbzCxBDm8zswQ5vM3MEuTwNjNLkMPbzCxBDm8z\nswQ5vM3MEuTwNjNLkMPbzCxBDm8zswQtqzeDpN8B7qyYdDpwfUTc1LKqchrZNsLQliHG9o7RfVo3\n/Vv7PYqLmS0JdcM7Ih4HzgKQVACeAb7R4rrqGtk2wvaB7Yy/Mg7A2J4xtg9sB3CAm9mi1+hlk37g\niYjY04piGjG0ZWgyuCeMvzLO0JahNlVkZjZ/Gg3vK4A7qj0haUDSsKTh0dHRY6+sjrG9Yw1NNzNb\nTHKHt6RO4J3AV6o9HxGDEVGKiFJvb2+z6qup+7TuhqabmS0mjbzzvhh4MCJ+1qpiGtG/tZ/iiuKU\nacUVRfq39repIjOz+VP3hmWFK6lxyaQdJm5K+tMmZrYUKSLqzyStAJ4GTo+IuheVS6VSDA8PN6E8\nM7OlQdLOiCjlnT/XO++IeAXomXNVZmbWVP6GpZlZghzeZmYJcnibmSXI4W1mliCHt5lZghzeZmYJ\ncnibmSXI4W1mliCHt5lZghzeZmYJcnibmSXI4W1mliCHt5lZghzeZmYJcnibmSXI4W1mlqBcgzFI\nOgH4LPBGIID3R8T3W1lYNSPbRiaHPeta3QXAwQMHJ4dA2/t/97JzcCdxJFBBbBzYyKabN01Zbrbh\n0vLO10idrRierVr7MPuQcJPL7BkDUd6LQHFleRzQ8ZfHJ+dVh9j4gfK2a8SOa3ZMbn8EnSs7OfTy\noSn1VNahgogjQffaxrfR9GPh8K8OT/ahq6eLiz9+cdOHxGtkv1brZ73+ztb+sa677jr3vDZAVuW5\nU622audevW09sm2Eez94LwdfOAi8to+g9nFbq8/NPr8a3bbV+tGO4RfzDoN2O/D3EfHZbBT5FRHx\nUq35WzEM2si2EbYPbGf8lfHqM3QAR2dO7uvvY9/3901ZrriiyObBzTPCbXr71eabS51zaaeR9juK\nHUjiyKEjVddZd9vVULq6lDvAd1yzg+FP197nxRVFznzvmTx8+8NV62hkG+XpT6GzwKW3Xtq0k6qR\n/ZqnvunLztY+0JR151lnpYn9X2++evtuZNsId7/vbo6OTz1BVRAdhY6qx22tPlc7ho7l/Gp0v1br\nR7OOtUaHQat72UTS64C3AZ8DiIhDswV3qwxtGZo9fKoEN8BTQ0/NWG78lXGGtgzVbb/afHOpcy7t\nNNL+0fGjU06A6eusu+1q2Dm4s2nzjr8yzs7BnTXraGQb5enPkUNHmrbNa62zVs156pu+7GztN2vd\nedZZaWKf1puv3r4b2jI0I/AA4kjUPG5r9bnaMXQs51ej27ZaP5p9rOWV57LJ6cAocJukM4GdwAcj\n4uXKmSQNAAMAp512WrPrZGxv3XGPj6m9Wu03ut5mtdNo+7PNO9d1x5H6/ytrZN568+Sts9nzHUtb\n1abPpb65HDdzWXeedU6Y2F95+tNonXNtq9YxNNd93az92ux8yiPPDctlwDnApyPibOBl4CPTZ4qI\nwYgoRUSpt7e3yWVC92ndLW2vVvuNrrdZ7TTa/mzzznXdKqip89abJ2+dzZ7vWNqqNn0u9c3WfjPX\nnWedEyb2V57+zDbPXM6hWsvUOobmuq+btW2bnU955AnvfcC+iLg/+/2rlMN8XvVv7ae4olh7hho9\n6evvm7FccUVx8ibfbO1Xm28udc6lnUba7yh2UOgs1Fxn3W1Xw8aBjU2bt7iiyMaBjTXraGQb5elP\nobPQtG1ea521as5T3/RlZ2u/WevOs85KE/u03nz19l3/1n46ijNPUBVU87it1edqx9CxnF+Nbttq\n/Wj2sZZX3fCOiJ8CT0v6nWxSP/CjllZVxYarNrB5cDPda7tB5bu8XT1dIOhe283lX7ic0tWlyVdm\nFUTp6hLv+e57pizXvba76s2I6e3Xmq/ROufaTiPtX3bbZVx666U11zllGSh/2iRTXFmc/MTJBHWo\noZuVAJtu3jRl+yPoXNU5pZ5NN2+aUsfkO7sGt1G1Y6GyD109XU29WVltnbPVPH17Vx6TUH3Z2dpv\nxrrrrrPCxLkzsf/rnXv19t2GqzZw2W2XlZfJdPV08a7b31XzuK3V5ynHUBPOr0a3bbV+NPtYyyvv\np03OovxRwU7gSeB9EfFirflb8WkTM7PFrNFPm+T6nHdEPATkbtTMzFrL37A0M0uQw9vMLEEObzOz\nBDm8zcwS5PA2M0uQw9vMLEEObzOzBDm8zcwS5PA2M0uQw9vMLEEObzOzBDm8zcwS5PA2M0uQw9vM\nLEEObzOzBOX6e96SdgO/AI4Ahxv5g+GN2nHNDoY/Mwx1xohQh9j4gY1sunkTI9tGuPv9d3P00Gsj\nO/f199Hzhh52Du6cMWhpV08XZ/zRGTz8hYcZf3nqyNFr1q/hwK4DU9qaumLoXNnJoV8eQgURR2Ly\n58QIGwdfODijzWsfvXayf9VqqmWi1kfvenRKu+oQcTQorixy+OBh4mjt9rp6urj44xcDsP0D2yf7\nrA6x7p+v48CuA4ztmfsAqp2rOjn5zSfz1PeemtxvheUFOpZ1zNi+jSquLLLsuGUztumEiW3f0dkx\nZZ9pmTiu+7iayzXbxP7o6uni1V+8OqWWwvICR16dOkp6V08XK399Jc//6PkZbU32+cBBuk/rZvVv\nrWb3fburHjOdqzp507vfNOP4KK4scmT8yNTjWNQ8rwrLC3Su6uTgCwcnt2n32u7J4b2GtgwxtneM\n4orqx1u9/TQrQd+FfeXjcNo6VBDrLljHTx/6adXjf7Y+zZi3XhnT+r3hqg2MbBsp933PWN11VWbS\nfMg7ks5uoBQRM4+0KuY6ks6Oa3Yw/OnGluvr7+OpoacaXtd8W7N+Dev+2bqG+9csKmQHcP4B4c3o\nKHYgiSOHjtSfeREprihy5nvP5OHbH2b8lcbegDQ6hOCERkfSWVDh/dFlH839jjRFE6/sZrbwzfV8\nVUFcf/j6xpdrMLzzXvMO4NuSdkoaqLHiAUnDkoZHR0fzrn/qShZ5sC32/pktJnM9X+frPM8b3udH\nxDnAxcC1kt42fYaIGIyIUkSUent751TM5Mjji9Ri75/ZYjLX83W+zvNc4R0Rz2Y/9wPfAM5tRTEb\nBzY2vExff18LKmm+NevXzKl/zaKCyjdczBrQUeyg0FlodxnzrriiyMaBjRRXFBtedr7O87rhLWml\npOMnHgN/ADzSimI23byJ0tWlXCGjDlG6usR7vvseLv+by+nonNqVvv4+SleXqr4KdvV0Ubq6RHHl\nzB2zZv2aGW1NXXH5Dj+89go78bOrp2vyEyfT27z20Wsn+9fIK/NErdPbVUe5jeLK4uTj2dp41+3v\n4vIvXj6lz+oQff19dK/tzl1PNZ2rOssvohVlFJYXqm7fRhVXFqtu0wkT23L6PtMyzbpcs03sg66e\nrhm1FJbPDL+uni7WrF9Tta3JPgu613bT199X85jpXNVZ9fgorizOPI5nOUwKywuTbUysq3ttN5fd\ndhmX3npp+RhR7eOt3n6alXjtOJy2DhXKx2it4z9vVuQqo6Lfmwc3s+nmTWwe3Pza+VGnmYlMWjCf\nNpF0OuV321D+aOGXImLrbMvM9YalmdlS1egNy7qf846IJ4Ezj6kqMzNrKn/D0swsQQ5vM7MEObzN\nzBLk8DYzS5DD28wsQQ5vM7MEObzNzBLk8DYzS5DD28wsQQ5vM7MEObzNzBLk8DYzS5DD28wsQQ5v\nM7MEObzNzBLk8DYzS1DdwRgmSCoAw8AzEXFJswv5aOdHiXGPrm5m6ZuP4dAaeef9QeCxVhTh4Daz\nxWT408PsuGZHS9eRK7wlnQJsAj7biiIc3Ga22Owc3NnS9vO+874J+DBwtNYMkgYkDUsaHh0dbUpx\nZmapiiOtfVNaN7wlXQLsj4hZX0YiYjAiShFR6u3tbVqBZmYpUkEtbT/PO+/zgXdK2g18GbhQ0t80\nswgVW9tJM7P5tnFgY0vbrxveEXFdRJwSEeuAK4DvRcSfNLOI6w9d7wA3s0VjPj5tkvujgq12/aHr\n212CmVkyGgrviLgPuK8llZiZWW7+hqWZWYIc3mZmCXJ4m5klyOFtZpYgh7eZWYIc3mZmCXJ4m5kl\nyOFtZpYgh7eZWYIc3mZmCXJ4m5klyOFtZpYgh7eZWYIc3mZmCXJ4m5klyOFtZpaguoMxSDoO+Dtg\neTb/VyPihmYXcqNubHaTZmZtc0PzY3KKPO+8XwUujIgzgbOAiySd18wiHNxmtti0OtfqvvOOiAB+\nmf1azP5FK4syM7PZ5brmLakg6SFgP/CdiLi/yjwDkoYlDY+Ojja7TjMzq5ArvCPiSEScBZwCnCvp\njVXmGYyIUkSUent7m12nmZlVaOjTJhHxEuXR4y9qSTVmZpZL3fCW1CvphOxxF/B24MfNLKLVd2XN\nzOZbq3Ot7g1L4CTgdkkFymF/V0Tc0+xCHOBmZvnl+bTJD4Gz56EWMzPLyd+wNDNLkMPbzCxBDm8z\nswQ5vM3MEuTwNjNLkMPbzCxBDm8zswQ5vM3MEuTwNjNLkMPbzCxBDm8zswQ5vM3MEuTwNjNLkMPb\nzCxBDm8zswTV/Xvekk4FvgD8BnAUGIyIjze7kBt1Y7ObNDNrm1YPMJPnnfdh4EMR8bvAecC1ktY3\nswgHt5ktNq3OtbrhHRHPRcSD2eNfAI8BJ7e0KjMzm1VD17wlraM8JNr9VZ4bkDQsaXh0dLQ51ZmZ\nWVW5w1vSKuBrwJ9HxM+nPx8RgxFRiohSb29vM2s0M7NpcoW3pCLl4N4WEV9vbUlmZlZP3fCWJOBz\nwGMR8detKKLVd2XNzOZbq3Ot7kcFgfOBdwMjkh7Kpv1FRHyrmYU4wM3M8qsb3hHxfwDNQy1mZpaT\nv2FpZpYgh7eZWYIc3mZmCXJ4m5klyOFtZpYgh7eZWYIc3mZmCXJ4m5klyOFtZpYgh7eZWYIc3mZm\nCXJ4m5klyOFtZpYgh7eZWYIc3mZmCXJ4m5klqO5gDJJuBS4B9kfEG1tVyI26sVVNm5nNu1aPDpbn\nnffngYtaWYSD28wWm1bnWt3wjoi/Aw60tAozM2tI0655SxqQNCxpeHR0tFnNmplZFU0L74gYjIhS\nRJR6e3ub1ayZmVXhT5uYmSVoQYR3q+/KmpnNt1bnWp6PCt4BXACskbQPuCEiPtfsQhzgZmb51Q3v\niLhyPgoxM7P8FsRlEzMza4zD28wsQQ5vM7MEObzNzBLk8DYzS5DD28wsQQ5vM7MEObzNzBLk8DYz\nS5DD28wsQQ5vM7MEObzNzBLk8DYzS5DD28wsQQ5vM7ME5QpvSRdJelzSLkkfaXVRZmY2uzwj6RSA\nTwG/D+wDHpD0zYj4UTMLuVE3NrM5M7O2avXoYHneeZ8L7IqIJyPiEPBl4NJmFuHgNrPFptW5lie8\nTwaervh9XzbNzMzaJE94q8q0mDGTNCBpWNLw6OjosVdmZmY15QnvfcCpFb+fAjw7faaIGIyIUkSU\nent7m1WfmZlVkSe8HwB+W1KfpE7gCuCbrS3LzMxmUze8I+Iw8G+BvwUeA+6KiEebWUSr78qamc23\nVuda3Y8KAkTEt4BvtbIQB7iZWX7+hqWZWYIc3mZmCXJ4m5klyOFtZpYgh7eZWYIUMePLksfeqDQK\n7Jnj4muA55tYTgqWYp/B/V5qlmK/G+nz2ojI/Q3HloT3sZA0HBGldtcxn5Zin8H9bncd820p9ruV\nffZlEzOzBDm8zcwStBDDe7DdBbTBUuwzuN9LzVLsd8v6vOCueZuZWX0L8Z23mZnV4fA2M0vQggnv\nxTBCvaRbJe2X9EjFtNWSviPpJ9nPE7PpkvSJrL8/lHROxTLvzeb/iaT3VkzfKGkkW+YTkqqNcjSv\nJJ0q6X9LekzSo5I+mE1f7P0+TtIPJD2c9fvGbHqfpPuzPtyZ/Q18JC3Pft+VPb+uoq3rsumPS/rD\niukL8pyQVJD0j5LuyX5fCn3enR2DD0kazqa19xiPiLb/AwrAE8DpQCfwMLC+3XXNoR9vA84BHqmY\n9lfAR7LHHwH+a/b4HcC9lIeZOw+4P5u+Gngy+3li9vjE7LkfAG/JlrkXuHgB9Pkk4Jzs8fHA/wPW\nL4F+C1iVPS4C92f9uQu4Ipv+GeDq7PE1wGeyx1cAd2aP12fH+3KgLzsPCgv5nAD+PfAl4J7s96XQ\n593AmmnT2nqMt32jZIW/Bfjbit+vA65rd11z7Ms6pob348BJ2eOTgMezx7cAV06fD7gSuKVi+i3Z\ntJOAH1dMnzLfQvkH/C/g95dSv4EVwIPAmyl/m25ZNn3yuKY8mMlbssfLsvk0/VifmG+hnhOUh0Ec\nAi4E7sn6sKj7nNWym5nh3dZjfKFcNlnMI9T/ekQ8B5D9/LVseq0+zzZ9X5XpC0b23+KzKb8LXfT9\nzi4fPATsB75D+V3jS1EefQqm1jrZv+z5MaCHxrdHu90EfBg4mv3ew+LvM5QHXf+2pJ2SBrJpbT3G\nc42kMw9yjVC/yNTqc6PTFwRJq4CvAX8eET+f5ZLdoul3RBwBzpJ0AvAN4HerzZb9bLR/1d5YtbXf\nki4B9kfETkkXTEyuMuui6XOF8yPiWUm/BnxH0o9nmXdejvGF8s471wj1ifqZpJMAsp/7s+m1+jzb\n9FOqTG87SUXKwb0tIr6eTV70/Z4QES8B91G+vnmCpIk3RZW1TvYve74bOEDj26OdzgfeKWk38GXK\nl05uYnH3GYCIeDb7uZ/yC/W5tPsYb/e1pIrrYU9SvnkxcaPijHbXNce+rGPqNe//xtSbGn+VPd7E\n1JsaP8imrwaeonxD48Ts8ersuQeyeSduarxjAfRXwBeAm6ZNX+z97gVOyB53AX8PXAJ8hak3767J\nHl/L1Jt3d2WPz2DqzbsnKd+4W9DnBHABr92wXNR9BlYCx1c8/gfgonYf420/CCo20Dsof1LhCWBL\nu+uZYx/uAJ4Dxim/mv5rytf4hoCfZD8ndpaAT2X9HQFKFe28H9iV/XtfxfQS8Ei2zCfJviHb5j7/\nU8r/xfsh8FD27x1LoN9vAv4x6/cjwPXZ9NMpf3JgVxZqy7Ppx2W/78qeP72irS1Z3x6n4lMGC/mc\nYGp4L+o+Z/17OPv36ERd7T7G/fV4M7MELZRr3mZm1gCHt5lZghzeZmYJcnibmSXI4W1mliCHt5lZ\nghzeZmYJ+v8In1x3CuZ2NgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x130549630>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(range(train_data.shape[0]), train_data[\"bedrooms\"].values,color='purple')\n",
    "plt.title(\"bedrooms\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 405,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#删除大于等于7的点\n",
    "train_data = train_data[train_data['bedrooms']<7] \n",
    "test_data = test_data[test_data['bedrooms']<7] "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## building_id "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 406,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count     49342\n",
       "unique     7580\n",
       "top           0\n",
       "freq       8285\n",
       "Name: building_id, dtype: object"
      ]
     },
     "execution_count": 406,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data['building_id'].describe()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 407,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0                                   8285\n",
      "96274288c84ddd7d5c5d8e425ee75027     275\n",
      "11e1dec9d14b1a9e528386a2504b3afc     215\n",
      "80a120d6bc3aba97f40fee8c2204524b     213\n",
      "bb8658a3e432fb62a440615333376345     212\n",
      "f68bf347f99df026f4faad43cc604048     191\n",
      "c94301249b8c09429d329864d58e5b82     167\n",
      "ce6d18bf3238e668b2bf23f4110b7b67     165\n",
      "57ef86c28a8ae482dc3a3c3af28e8e48     159\n",
      "128d4af0683efc5e1eded8dc8044d5e3     153\n",
      "d0234abbc01a982d54e8d446acc03405     152\n",
      "9c18bf871b97492b96d8ddb800591f1b     147\n",
      "8e3b8c607c3edcf3de131c24f0390179     141\n",
      "cb14c4f807f23ecee1f7469b5159d2de     141\n",
      "ea9045106c4e1fe52853b6af941f1c69     139\n",
      "7967a1280bf3f7644500fc79d2696b0e     133\n",
      "a01c99eb2cfdde327e1691e17d6696ba     131\n",
      "093f64f52a6e43ba5e8f12bec8200554     128\n",
      "18f6eb16d2f3e9885cb4a5d0a40791c6     127\n",
      "5565db9b7cba3603834c4aa6f2950960     126\n",
      "dd7b281c0dbafc8b53575ab78cca75d4     125\n",
      "3a956bd42c50f06ac84cf072fc514f5f     123\n",
      "6ce872b483cfcbb32ea805604d44ef5f     120\n",
      "ec12b4db154a9c75983f5e32edd93843     116\n",
      "aed67db466854ec2aa5d249a032bc401     115\n",
      "93cd24891f8423d45ac587ab1fdb1225     115\n",
      "80911ce8a425daf4989ea8a4bccc41a7     112\n",
      "1be9c496f7bdff4a552e3d4e91f915f8     111\n",
      "300d27d8ba2adbcbc8c6f2bcbc1c6f9d     110\n",
      "c40e3e74475a91aae1928541be8df678     108\n",
      "                                    ... \n",
      "c5da4b55c7d7685577af59a9e3a5d4be       1\n",
      "fe64f41b283f5b64cf5696b9c73d7a04       1\n",
      "e531c29431396433497042dc630d471f       1\n",
      "1bdcb3d229819613bb97f4eae7422a22       1\n",
      "a844d388fcbee4dfbc923c1c506b05a9       1\n",
      "10f73dc2b43c1a190b158eb7bcf81b37       1\n",
      "5cd233bd4edf2ab1bfb045b4dcb14000       1\n",
      "235b39999f4c354347991e5ed09db917       1\n",
      "c8a1f3fefa093254299108da6757f68e       1\n",
      "696ea5ac2266f1bd5395f83366d3a6c7       1\n",
      "1ff39d15249d4fc0fc29202f37caabd5       1\n",
      "d4b04a66ff12c26aab4c187330681027       1\n",
      "a41273c885436b56a6d01dffab3ac15f       1\n",
      "e9c5b16ebe5e641f503e4863f6ba9301       1\n",
      "6a43f359de2b550a4510ec7507235caa       1\n",
      "95cce1e3dbfd0fcbd2d62e08dc689e5b       1\n",
      "b7ae715069cee8542cd22a0c563ccd2c       1\n",
      "b632ec8fb986a25ef1673f39a752b0a1       1\n",
      "c458d6d6e10306034cb8086b16eff568       1\n",
      "b9a2d406ffdfd7c6f014671e7b0d5ea9       1\n",
      "91b61f37fba3aeddebe8de7855d36931       1\n",
      "962c7157538ec2042dedf7f72bb32235       1\n",
      "2ed7082dd069e9b0ecb93de2135b1e1d       1\n",
      "cbea8868eb2a37bc3db5ff0d699c600a       1\n",
      "32fb8d85fe478b547f3deeb55e81ab5b       1\n",
      "c92acaa39aea49bc5ea36f76ccd88509       1\n",
      "48ffbcd5ffefbdb589b13f432214eb60       1\n",
      "0eeb1c3a83415caecbc6aa57a003f02e       1\n",
      "5397b068a759397e555f401ec136b952       1\n",
      "06329938c684b97638e47e45aa9b589b       1\n",
      "Name: building_id, Length: 7580, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "idcounts= train_data['building_id'].value_counts()\n",
    "print idcounts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "删除building_id\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 408,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_data = train_data.drop('building_id', axis=1)\n",
    "test_data = test_data.drop('building_id', axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 409,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5,1,u'price')"
      ]
     },
     "execution_count": 409,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZQAAAEICAYAAAB4YQKYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFWFJREFUeJzt3X+MXWd95/H3F9v5Vco4Tgyb+kcc\nFlfCqVtKRmmq7K5QpiJ2AnWESmuUFi9NNRUECbSrLUldbZQW7wIrLQHxS1aT4iAXJ0tTEadBqTtQ\ntdVCkjEJTIybZkhie2QvNrUzQAOJHb77x33GXE/GM/dOHvvMj/dLuppzvuc55zyPfed+5pznjB2Z\niSRJr9Srmu6AJGluMFAkSVUYKJKkKgwUSVIVBookqQoDRZJUhYEiNSQi/mNEPNl0P6Rawt9DkSTV\n4BWK1ICIWNh0H6TaDBSpooh4NiJujYjvRMSxiPiLiDgvIt4SESMR8aGI+H/AX4zV2vZdERH3RcSR\niPjXiPhU27bfj4i95ZgPRcSljQxQmoSBItV3I3At8O+BXwT+pNT/HbAEuBTob98hIhYADwD7gFXA\nMmBH2XYD8MfAO4ClwD8CXzzDY5C6ZqBI9X0qMw9k5lFgC/CuUv8pcFtmvpCZPx63z5XALwD/LTP/\nLTN/kpn/VLb9IfA/M3NvZp4A/gfwJq9SNNMYKFJ9B9qW99EKCoAjmfmT0+yzAthXAmO8S4FPRMRz\nEfEccBQIWlcx0ozhxKBU34q25ZXAwbI82SOVB4CVEbFwglA5AGzJzO0V+yhV5xWKVN/NEbE8IpbQ\nmvu4p4N9HgEOAR+JiJ8rE/lXl22fA26NiMsBIqInIt55RnouvQIGilTfXwJ/CzxdXh+eaofMfAl4\nO/AGYD8wAvxO2fbXwEeBHRHxA+AJYP0Z6bn0CviLjVJFEfEs8AeZ+XdN90U627xCkSRVYaBIkqrw\nlpckqQqvUCRJVcyb30O5+OKLc9WqVU13Q5Jmld27d38/M5d20nbeBMqqVasYHBxsuhuSNKtExL5O\n23rLS5JUhYEiSarCQJEkVWGgSJKqMFAkSVXMm6e8pIkMbR9iYPMAo/tH6VnZQ9+WPtbeuLbpbkmz\nkoGieWto+xA7+3dy/PnjAIzuG2Vn/04AQ0WaBm95ad4a2DxwMkzGHH/+OAObBxrqkTS7GSiat0b3\nj3ZVlzQ5A0XzVs/Knq7qkiZnoGje6tvSx6ILFp1SW3TBIvq29DXUI2l2c1Je89bYxLtPeUl1GCia\n19beuNYAkSrxlpckqQoDRZJUhYEiSarCQJEkVWGgSJKqMFAkSVUYKJKkKgwUSVIVBookqQoDRZJU\nhYEiSarCQJEkVWGgSJKqMFAkSVV0HCgRsSAiHouIB8r6ZRHxcEQ8FRH3RMQ5pX5uWR8u21e1HePW\nUn8yIq5tq68rteGIuKWt3vU5JEnN6OYK5QPA3rb1jwIfz8zVwDHgplK/CTiWmW8APl7aERFrgI3A\n5cA64DMlpBYAnwbWA2uAd5W2XZ9DktScjgIlIpYD1wN/XtYDuAb4UmmyDbihLG8o65TtfaX9BmBH\nZr6Qmc8Aw8CV5TWcmU9n5ovADmDDNM8hSWpIp1codwB/BPy0rF8EPJeZJ8r6CLCsLC8DDgCU7aOl\n/cn6uH1OV5/OOU4REf0RMRgRg0eOHOlwqJKk6ZgyUCLibcDhzNzdXp6gaU6xrVZ9qvP/rJC5NTN7\nM7N36dKlE+wiSaqlk/9T/mrgNyPiOuA84DW0rlgWR8TCcoWwHDhY2o8AK4CRiFgI9ABH2+pj2veZ\nqP79aZxDktSQKa9QMvPWzFyematoTap/NTNvBL4G/FZptgn4clm+v6xTtn81M7PUN5YntC4DVgOP\nAI8Cq8sTXeeUc9xf9un2HJKkhnRyhXI6HwJ2RMSHgceAO0v9TuALETFM66phI0Bm7omIe4HvACeA\nmzPzJYCIeD/wELAAuCsz90znHJKk5sR8+cG+t7c3BwcHm+6GJM0qEbE7M3s7aetvykuSqjBQJElV\nGCiSpCoMFElSFQaKJKkKA0WSVIWBIkmqwkCRJFVhoEiSqjBQJElVGCiSpCoMFElSFQaKJKkKA0WS\nVIWBIkmqwkCRJFVhoEiSqjBQJElVGCiSpCoMFElSFQaKJKkKA0WSVIWBIkmqwkCRJFVhoEiSqjBQ\nJElVGCiSpCoMFElSFQaKJKkKA0WSVIWBIkmqwkCRJFVhoEiSqjBQJElVGCiSpCqmDJSIOC8iHomI\nb0XEnoi4vdQvi4iHI+KpiLgnIs4p9XPL+nDZvqrtWLeW+pMRcW1bfV2pDUfELW31rs8hSWpGJ1co\nLwDXZOavAG8C1kXEVcBHgY9n5mrgGHBTaX8TcCwz3wB8vLQjItYAG4HLgXXAZyJiQUQsAD4NrAfW\nAO8qben2HJKk5kwZKNnyo7K6qLwSuAb4UqlvA24oyxvKOmV7X0REqe/IzBcy8xlgGLiyvIYz8+nM\nfBHYAWwo+3R7DklSQzqaQylXEo8Dh4FdwHeB5zLzRGkyAiwry8uAAwBl+yhwUXt93D6nq180jXOM\n73d/RAxGxOCRI0c6GaokaZo6CpTMfCkz3wQsp3VF8caJmpWvE10pZMX6ZOc4tZC5NTN7M7N36dKl\nE+wiSaqlq6e8MvM54O+Bq4DFEbGwbFoOHCzLI8AKgLK9BzjaXh+3z+nq35/GOSRJDenkKa+lEbG4\nLJ8P/AawF/ga8Ful2Sbgy2X5/rJO2f7VzMxS31ie0LoMWA08AjwKrC5PdJ1Da+L+/rJPt+eQJDVk\n4dRNuATYVp7GehVwb2Y+EBHfAXZExIeBx4A7S/s7gS9ExDCtq4aNAJm5JyLuBb4DnABuzsyXACLi\n/cBDwALgrszcU471oW7OIUlqTsyXH+x7e3tzcHCw6W5I0qwSEbszs7eTtv6mvCSpCgNFklSFgSJJ\nqsJAkSRVYaBIkqowUCRJVRgokqQqDBRJUhUGiiSpCgNFklSFgSJJqsJAkSRVYaBIkqowUCRJVRgo\nkqQqDBRJUhUGiiSpCgNFklSFgSJJqsJAkSRVYaBIkqowUCRJVRgokqQqDBRJUhUGiiSpCgNFklSF\ngSJJqsJAkSRVYaBIkqowUCRJVRgokqQqDBRJUhUGiiSpCgNFklSFgSJJqmLKQImIFRHxtYjYGxF7\nIuIDpb4kInZFxFPl64WlHhHxyYgYjohvR8Sb2461qbR/KiI2tdWviIihss8nIyKmew5JUjM6uUI5\nAfzXzHwjcBVwc0SsAW4BBjJzNTBQ1gHWA6vLqx/4LLTCAbgN+DXgSuC2sYAobfrb9ltX6l2dQ5LU\nnCkDJTMPZeY3y/IPgb3AMmADsK002wbcUJY3AHdnyzeAxRFxCXAtsCszj2bmMWAXsK5se01mfj0z\nE7h73LG6OYckqSFdzaFExCrgV4GHgddl5iFohQ7w2tJsGXCgbbeRUpusPjJBnWmcY3x/+yNiMCIG\njxw50s1QJUld6jhQIuLVwF8BH8zMH0zWdIJaTqM+aXc62Sczt2Zmb2b2Ll26dIpDSpJeiY4CJSIW\n0QqT7Zl5Xyl/b+w2U/l6uNRHgBVtuy8HDk5RXz5BfTrnkCQ1pJOnvAK4E9ibmf+7bdP9wNiTWpuA\nL7fV312exLoKGC23qx4C3hoRF5bJ+LcCD5VtP4yIq8q53j3uWN2cQ5LUkIUdtLka+D1gKCIeL7U/\nBj4C3BsRNwH7gXeWbQ8C1wHDwPPAewAy82hE/BnwaGn3p5l5tCy/F/g8cD7wlfKi23NIkpoTrQer\n5r7e3t4cHBxsuhuSNKtExO7M7O2krb8pL0mqwkCRJFVhoEiSqjBQJElVGCiSpCoMFElSFQaKJKkK\nA0WSVIWBIkmqwkCRJFVhoEiSqjBQJElVGCiSpCoMFElSFQaKJKkKA0WSVIWBIkmqwkCRJFVhoEiS\nqjBQJElVGCiSpCoMFElSFQaKJKkKA0WSVIWBIkmqwkCRJFVhoEiSqjBQJElVLGy6A5oZhrYPMbB5\ngNH9o/Ss7KFvSx9rb1zbdLckzSIGihjaPsTO/p0cf/44AKP7RtnZvxPAUJHUMW95iYHNAyfDZMzx\n548zsHmgoR5Jmo0MFDG6f7SruiRNxFteomdlD6P7Xh4ePSt7GuiNzobJ5sycT9N0GSiib0vfKXMo\nAIsuWETflr4Ge6UzZbI5M8D5NE2bgaKTHxT+VDo/TDVndrptvh80lSkDJSLuAt4GHM7MXyq1JcA9\nwCrgWeC3M/NYRATwCeA64HngP2fmN8s+m4A/KYf9cGZuK/UrgM8D5wMPAh/IzJzOOTR9a29c6wfG\nPDGdOTPn09SJTiblPw+sG1e7BRjIzNXAQFkHWA+sLq9+4LNwMoBuA34NuBK4LSIuLPt8trQd22/d\ndM4hqTOnmxvrWdkz6TZpKlMGSmb+A3B0XHkDsK0sbwNuaKvfnS3fABZHxCXAtcCuzDyamceAXcC6\nsu01mfn1zEzg7nHH6uYckjrQt6WPRRcsOqU2Nmc22TZpKtOdQ3ldZh4CyMxDEfHaUl8GHGhrN1Jq\nk9VHJqhP5xyHxncyIvppXcWwcuXKLocozU2dzJk5n6bpqD0pHxPUchr16Zzj5cXMrcBWgN7e3qmO\nK80bk82ZOZ+m6ZruLzZ+b+w2U/l6uNRHgBVt7ZYDB6eoL5+gPp1zSJIaNN1AuR/YVJY3AV9uq787\nWq4CRsttq4eAt0bEhWUy/q3AQ2XbDyPiqvL01rvHHaubc0iSGtTJY8NfBN4CXBwRI7Se1voIcG9E\n3ATsB95Zmj9I63HeYVqP9L4HIDOPRsSfAY+Wdn+amWMT/e/lZ48Nf6W86PYckqRmRevhqrmvt7c3\nBwcHm+6GJM0qEbE7M3s7aes/DilJqsJAkSRVYaBIkqowUCRJVRgokqQqDBRJUhUGiiSpCgNFklSF\ngSJJqsJAkSRVYaBIkqowUCRJVRgokqQqDBRJUhUGiiSpCgNFklSFgSJJqsJAkSRVYaBIkqowUCRJ\nVRgokqQqDBRJUhUGiiSpCgNFklSFgSJJqsJAkSRVYaBIkqowUCRJVRgokqQqFjbdAc18Q9uHGNg8\nwOj+UXpW9tC3pY+1N65tuluSZhgDRZMa2j7Ezv6dHH/+OACj+0bZ2b8TwFA5QwxwzVYGyhxU8wNp\nYPPAyTAZc/z54wxsHvBD7gw4kwFuUM0tM/Hv00CZY2p/II3uH+2qrlfmlQT4ZB8wNd4XM/EDbL6a\nqXcOIjMbO/nZ1Nvbm4ODg2f9vK/0m/Bv3vc37N66m3wpiQXBFf1XcP1nrj9t+ztW3cHovpd/2MeC\nIH+aU/ZhfH9f/NGL/Phff/yydj2X9vDBZz/Y8Ti6VfPD6+Sx9o22/hxeSnouPTsfiN2O4/ZX3Q6n\n+ZacrM/jP2AAFl2wiLdvfTtrb1zLxy7+2Gn/Hvu29J3s4/lLzufET05w/N9axzn/ovNZ/4n1AJMe\nX53p5P3QSZvTfZ+fie/LiNidmb0dtTVQJnf3b9zNMwPPnIEeSdJZFPCOL7yj6x8AugmUWfvYcESs\ni4gnI2I4Im45E+e4PW43TCTNDQn3/e59DG0fOmOnmJWBEhELgE8D64E1wLsiYk3Nc9wet9c8nCTN\nCPf97n1n7NizMlCAK4HhzHw6M18EdgAbGu6TJM1rszVQlgEH2tZHSu0UEdEfEYMRMXjkyJGz1jlJ\nmo9ma6DEBLWXPV2QmVszszcze5cuXXoWuiVJ89dsDZQRYEXb+nLgYEN9kSQxewPlUWB1RFwWEecA\nG4H7a57gtryt5uEkaUY4k59ts/I35TPzRES8H3gIWADclZl7ap/HUJGkzs3KQAHIzAeBB5vuhySp\nZbbe8pIkzTAGiiSpCgNFklSFgSJJqmLe/GvDEXEE2DfN3S8Gvl+xO7OF454/5uOYwXF34tLM7Og3\nw+dNoLwSETHY6T/fPJc47vljPo4ZHHft43rLS5JUhYEiSarCQOnM1qY70BDHPX/MxzGD467KORRJ\nUhVeoUiSqjBQJElVGChTiIh1EfFkRAxHxC1N96dbEXFXRByOiCfaaksiYldEPFW+XljqERGfLGP9\ndkS8uW2fTaX9UxGxqa1+RUQMlX0+GRET/ednZ11ErIiIr0XE3ojYExEfKPU5O/aIOC8iHomIb5Ux\n317ql0XEw6X/95T/8oGIOLesD5ftq9qOdWupPxkR17bVZ+z3Q0QsiIjHIuKBsj7nxx0Rz5b34OMR\nMVhqzb3HM9PXaV60/mn87wKvB84BvgWsabpfXY7hPwFvBp5oq30MuKUs3wJ8tCxfB3yF1v+IeRXw\ncKkvAZ4uXy8syxeWbY8Av172+Qqwvukxl35dAry5LP888C/Amrk89tKPV5flRcDDZSz3AhtL/XPA\ne8vy+4DPleWNwD1leU15r58LXFa+BxbM9O8H4L8Afwk8UNbn/LiBZ4GLx9Uae497hTK5K4HhzHw6\nM18EdgAbGu5TVzLzH4Cj48obgG1leRtwQ1v97mz5BrA4Ii4BrgV2ZebRzDwG7ALWlW2vycyvZ+vd\nd3fbsRqVmYcy85tl+YfAXmAZc3jspe8/KquLyiuBa4Avlfr4MY/9WXwJ6Cs/gW4AdmTmC5n5DDBM\n63thxn4/RMRy4Hrgz8t6MA/GfRqNvccNlMktAw60rY+U2mz3usw8BK0PXuC1pX668U5WH5mgPqOU\nWxq/Susn9jk99nLb53HgMK0Phu8Cz2XmidKkvZ8nx1a2jwIX0f2fxUxwB/BHwE/L+kXMj3En8LcR\nsTsi+kutsff4rP0Pts6Sie4XzuXnrE833m7rM0ZEvBr4K+CDmfmDSW4Bz4mxZ+ZLwJsiYjHw18Ab\nJ2pWvnY7tol+AG18zBHxNuBwZu6OiLeMlSdoOqfGXVydmQcj4rXAroj450nanvH3uFcokxsBVrSt\nLwcONtSXmr5XLmcpXw+X+unGO1l9+QT1GSEiFtEKk+2ZeV8pz4uxZ+ZzwN/Tule+OCLGfnhs7+fJ\nsZXtPbRuj3b7Z9G0q4HfjIhnad2OuobWFctcHzeZebB8PUzrB4grafI93vSk0kx+0bqCe5rWBN3Y\nZNzlTfdrGuNYxamT8v+LUyftPlaWr+fUSbtHSn0J8AytCbsLy/KSsu3R0nZs0u66psdb+hW07vne\nMa4+Z8cOLAUWl+XzgX8E3gb8H06dnH5fWb6ZUyen7y3Ll3Pq5PTTtCamZ/z3A/AWfjYpP6fHDfwc\n8PNty/8XWNfke7zxN8BMf9F6MuJfaN2L3tx0f6bR/y8Ch4DjtH7iuInW/eIB4KnydezNE8Cny1iH\ngN624/w+rUnKYeA9bfVe4Imyz6co//pC0y/gP9C6PP828Hh5XTeXxw78MvBYGfMTwH8v9dfTelpn\nuHzInlvq55X14bL99W3H2lzG9SRtT/bM9O8HTg2UOT3uMr5vldeesX41+R73n16RJFXhHIokqQoD\nRZJUhYEiSarCQJEkVWGgSJKqMFAkSVUYKJKkKv4/Yeeusknr+8cAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x12fc86518>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(range(train_data.shape[0]), train_data[\"price\"].values,color='purple')\n",
    "plt.title(\"price\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 410,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#删除大于60000的离群点\n",
    "train_data = train_data[train_data['price']<=60000] \n",
    "test_data = test_data[test_data['price']<=60000] "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## latitude、longitude"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 411,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEICAYAAABPgw/pAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAEpVJREFUeJzt3X+QXWV9x/H312RBaXBBExQSQrBG\nCzYDykrjWKtmwWKRH7VqsUzJtEwzA9ZRq4NC2qGZaVqoRWhHpE3FMY6ZIlUphGAVV9G2I6G7/HCN\nSBMsgcivpMoK0mIC3/5xz8om2d17d/fu/fHk/ZrZ2Xuec+6e75OT+9mzz33uOZGZSJK63wvaXYAk\nqTkMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoUh0R8aaIuK/ddUj1GOjqChHxQESc0qJ9ZUS8cnQ5\nM/8tM1/din1LM2GgS1IhDHR1tYj4o4jYFhE/joibIuKoMeteExG3Vusei4hLqvaTI+I7EfFERDwS\nEZ+MiIOqdd+unn5PRDwVEb8bEW+JiB1jfu5xEXFb9fwtEXHmmHWfjYirI2JTRDwZEZsj4pdb9M+h\nA5yBrq4VESuAvwLeAxwJbAeuq9YdCnwd+FfgKOCVwED11GeBDwHzgTcA/cCFAJn5G9U2J2TmvMz8\nwj777AE2Al8DjgDeD2yIiLFDMu8F1gCHA9uAtU3rtDQJA13d7FzgM5l5Z2Y+A1wMvCEilgDvAB7N\nzCsy8/8y88nM3AyQmUOZeXtm7snMB4B/AN7c4D6XA/OAyzLz55n5DeBmaiE+6suZeUdm7gE2ACfO\nvKtSfXPbXYA0A0cBd44uZOZTEfE/wELgaOD+8Z4UEa8CPgH0AYdQex0MTWGfD2Xmc2Patlf7HPXo\nmMdPU/sFIM06z9DVzR4GjhldiIhfAl4K/Ah4CJho7Poa4AfA0sx8MXAJEFPY59ERMfa1s7jap9RW\nBrq6SU9EvHD0C7ge+IOIODEiDgb+EthcDaPcDLw8Ij4YEQdHxKER8WvVzzkU+CnwVET8CnDBPvt5\nDHjFBDVsBn4GXBQRPRHxFuAMqrF7qZ0MdHWTW4D/HfP1JuDPgC8Bj1A7Iz8HIDOfBE6lFraPAluB\nt1Y/5yPA7wFPAv8I7PXGJ/DnwPpqFst7xq7IzJ8DZwJvB3YBnwLOy8wfNLGf0rSEN7iQpDJ4hi5J\nhTDQJakQBrokFcJAl6RCtPSDRfPnz88lS5a0cpeS1PWGhoZ2ZeaCetu1NNCXLFnC4OBgK3cpSV0v\nIrY3sp1DLpJUCANdkgphoEtSIQx0SSqEgS5JhfB66B1ueMMwA6sHGHlwhN7FvfSv7WfZucvaXZak\nDmSgd7DhDcNsXLWR3U/vBmBk+wgbV20EMNQl7cchlw42sHrgF2E+avfTuxlYPTDBMzrL8IZhrlpy\nFWtesIarllzF8IbhdpckFc0z9A428uDIlNo7iX9dSK3nGXoH613cO6X2TtLtf11I3chA72D9a/vp\nOaRnr7aeQ3roX9vfpooa181/XUjdykDvYMvOXcYZ686g95heCOg9ppcz1p3RFUMW3fzXhdStHEPv\ncMvOXdYVAb6v/rX9e42hQ/f8dSF1KwNdTbPvnPkTVp7A1lu2OodeahEDXU0x3qyWe9bfs9cQ0aYL\nN3HDyhvIZ5OYE5y06iRO/9Tp7SxbKopj6GqKerNaNl24icFrBslnE4B8Nhm8ZpBNF25qea1SqTxD\nV1PUm9UytG5o3PVD64Z44FsPsOv7u37RNv/4+bxvy/uaX6RUOM/Q1RT1ZrWMnpnvK5/NvcIcYNf3\nd7FmzprmFigdAAx0NUW9OfMxJ6b2A5+DKxZe0azypAOCgV6wVl5Lpd6c+ZNWnTTln/nUw081u0yp\naI6hF6od11KZbM786GyWoXVDe81yGbzGm4ZLzWKgF2qyWSftmgt++qdO32+aooEuNY+BXqhOu5bK\n1a+5er83P+cdNY++C/omDPV5R81rRWlSMRxDL1QnXUvlioVX7BfmUBsjnyzMP/yjD892aVJRPEMv\nVKdcS2XThZsafnOz55Cerrn4mNSJPEMvVCdcqXF4w/CUxsi9Xro0M56hF6zdV2qcTjh7vXRp+ho+\nQ4+IORFxV0TcXC0fGxGbI2JrRHwhIg6avTLVbYY3DDOyferh7PXSpembypDLB4B7xyxfDlyZmUuB\nnwDnN7MwNV+rPmg0Ogd+qrxeujQzDQV6RCwCTgc+XS0HsAL4YrXJeuDs2ShQzTEasiPbRyCf/6DR\nbIT6eHPgJ9Vld2OSOlWjY+hXARcBh1bLLwWeyMw91fIOYOF4T4yIVcAqgMWLF0+/Us1IKz9oNJVx\n8Evz0qbuWzqQ1Q30iHgH8HhmDkXEW0abx9l03MvpZeY6YB1AX1/f+Jfc06xr5QeNehf31h0/N8il\n5mtkyOWNwJkR8QBwHbWhlquAwyJi9BfCIuDhWalQTdHKDxqNd+XFvfZ5jG98SrOhbqBn5sWZuSgz\nlwDnAN/IzHOBbwLvqjZbCdw4a1Vqxupd3raZRufAv+ilL9pvnW98SrNnJh8s+ijwJxGxjdqY+rXN\nKUmzodUfNFp27jIu2nUR7/z8O9v64SbpQBKZrRvW7uvry8FBr64nSVMREUOZ2VdvOz/6L0mFMNAl\nqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIK\nYaBLUiEMdEkqhIEuSYWYW3+T9hreMMzA6gFGHhyhd3Ev/Wv7vYWZJI2jowN9eMMwG1dtZPfTuwEY\n2T7CxlUbAQx1SdpHRw+5DKwe+EWYj9r99G4GVg+0qSJJ6lwdHegjD45MqV2SDmQdHei9i3un1C5J\nB7KODvT+tf30HNKzV1vPIT30r+1vU0WS1Lk6+k3R0Tc+neUiSfV1dKBDLdQNcEmqr6OHXCRJjTPQ\nJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEHUDPSJeGBF3RMQ9EbElItZU\n7cdGxOaI2BoRX4iIg2a/XEnSRBo5Q38GWJGZJwAnAqdFxHLgcuDKzFwK/AQ4f/bKlCTVUzfQs+ap\narGn+kpgBfDFqn09cPasVChJakhDY+gRMSci7gYeB24F7geeyMw91SY7gIUTPHdVRAxGxODOnTub\nUbMkaRwNBXpmPpuZJwKLgJOB48bbbILnrsvMvszsW7BgwfQrlSRNakqzXDLzCeA2YDlwWESMXk99\nEfBwc0uTJE1FI7NcFkTEYdXjFwGnAPcC3wTeVW22ErhxtoqUJNXXyB2LjgTWR8Qcar8Ars/MmyPi\n+8B1EfEXwF3AtbNYpySpjrqBnpnfBV47TvsPqY2nS5I6gJ8UlaRCGOiSVAgDXZIKYaBLUiEMdEkq\nhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY\n6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEu\nSYUw0CWpEAa6JBXCQJekQhjoklQIA12SClE30CPi6Ij4ZkTcGxFbIuIDVftLIuLWiNhafT989suV\nJE2kkTP0PcCHM/M4YDnwvog4HvgYMJCZS4GBalmS1CZ1Az0zH8nMO6vHTwL3AguBs4D11WbrgbNn\nq0hJUn1TGkOPiCXAa4HNwMsy8xGohT5wxATPWRURgxExuHPnzplVK0maUMOBHhHzgC8BH8zMnzb6\nvMxcl5l9mdm3YMGC6dQoSWpAQ4EeET3UwnxDZn65an4sIo6s1h8JPD47JUqSGtHILJcArgXuzcxP\njFl1E7CyerwSuLH55UmSGjW3gW3eCPw+MBwRd1dtlwCXAddHxPnAg8C7Z6dESVIj6gZ6Zv47EBOs\n7m9uOZKk6fKTopJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBL\nUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQV\nwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiHq\nBnpEfCYiHo+I741pe0lE3BoRW6vvh89umZKkeho5Q/8scNo+bR8DBjJzKTBQLUuS2qhuoGfmt4Ef\n79N8FrC+erweOLvJdUmSpmi6Y+gvy8xHAKrvR0y0YUSsiojBiBjcuXPnNHcnSapn1t8Uzcx1mdmX\nmX0LFiyY7d1J0gFruoH+WEQcCVB9f7x5JUmSpmO6gX4TsLJ6vBK4sTnlSJKmq5Fpi/8EfAd4dUTs\niIjzgcuAUyNiK3BqtSxJaqO59TbIzPdOsKq/ybVIkmbAT4pKUiEMdEkqhIEuSYUw0CWpEAa6JBXC\nQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0\nSSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJek\nQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFmDuTJ0fEacDfAnOAT2fmZU2pSpIKsCbW7Nd2aV46a/ub\n9hl6RMwBrgbeDhwPvDcijm9WYZLUzcYL88nam2EmQy4nA9sy84eZ+XPgOuCs5pQlSZqqmQT6QuCh\nMcs7qra9RMSqiBiMiMGdO3fOYHeSpMnMJNBjnLbcryFzXWb2ZWbfggULZrA7SdJkZhLoO4Cjxywv\nAh6eWTmSpOmaSaD/J7A0Io6NiIOAc4CbmlOWJHW3iWazzOYsl2lPW8zMPRHxx8BXqU1b/Exmbmla\nZZLU5WYzvMczo3nomXkLcEuTapEkzYCfFJWkQhjoklQIA12SCmGgS1IhInO/zwLN3s4idgLbG9x8\nPrBrFstppZL6AmX1x750rpL6M9O+HJOZdT+Z2dJAn4qIGMzMvnbX0Qwl9QXK6o996Vwl9adVfXHI\nRZIKYaBLUiE6OdDXtbuAJiqpL1BWf+xL5yqpPy3pS8eOoUuSpqaTz9AlSVNgoEtSIdoe6BHx7ojY\nEhHPRUTfmPZTI2IoIoar7yvGrDupat8WEX8XEePdbKMtJupPte7iqub7IuI3x7SfVrVti4iPtb7q\n+iLixIi4PSLuru5AdXLVHtUx2BYR342I17W71kZFxPurf/ctEfHXY9rHPU6dLiI+EhEZEfOr5a48\nNhHx8Yj4QVXzDRFx2Jh1XXdsWvr6zsy2fgHHAa8GbgP6xrS/FjiqevyrwI/GrLsDeAO1uyZ9BXh7\nu/vRQH+OB+4BDgaOBe6ndtnhOdXjVwAHVdsc3+5+jNOvr43+OwO/Bdw25vFXqmOxHNjc7lob7M9b\nga8DB1fLR0x2nNpdbwP9OZrapay3A/O7/Ni8DZhbPb4cuLxbj02rX99tP0PPzHsz875x2u/KzNE7\nIG0BXhgRB0fEkcCLM/M7WfsX+xxwdgtLntRE/aF2A+3rMvOZzPxvYBu1G213y822E3hx9biX5+9O\ndRbwuay5HTisOkad7gLgssx8BiAzH6/aJzpOne5K4CL2vg1kVx6bzPxaZu6pFm+ndjc06M5j09LX\nd9sDvUG/A9xVvfgWUrv93ahxb07dgSa6qXZDN9vuAB8EPh4RDwF/A1xctXdL/ft6FfCmiNgcEd+K\niNdX7V3Xn4g4k9pfsPfss6rr+jKOP6T2VwZ0Z39aWvOMbnDRqIj4OvDycVatzswb6zz3NdT+7Hrb\naNM4m7V07uU0+zNR3eP9Um3LXNLJ+gX0Ax/KzC9FxHuAa4FT6IDjMZE6/ZkLHE5tKOL1wPUR8Qo6\ntD91+nIJz78+9nraOG1t7ws09hqKiNXAHmDD6NPG2b4j+jOJltbckkDPzFOm87yIWATcAJyXmfdX\nzTt4/k8waMPNqafZn8luqt0RN9uerF8R8TngA9XiPwOfrh537M3C6/TnAuDL1bDdHRHxHLULKHVk\nfybqS0QsozaefE81N2ARcGf1pnVH9gXqv4YiYiXwDqC/OkbQwf2ZREtr7tghl+qd7U3AxZn5H6Pt\nmfkI8GRELK9mt5wHTHqW3yFuAs6p3gc4FlhK7c3dbrnZ9sPAm6vHK4Ct1eObgPOqGRXLgZHqGHW6\nf6HWDyLiVdTesNrFxMepI2XmcGYekZlLMnMJtQB5XWY+Spcem4g4DfgocGZmPj1mVVcdm0prX98d\n8C7wb1P7T/gM8Bjw1ar9T4GfAXeP+RqdidAHfI/au8efpPrEayd8TdSfat3qqub7GDMzh9pshP+q\n1q1udx8m6NevA0PU3qXfDJxUtQdwdVX7MGNm9nTyF7UA/3z1/+hOYEW949QNX8ADPD/LpVuPzTZq\n486jr/u/7+Zj08rXtx/9l6RCdOyQiyRpagx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVIj/B/SL\nvbLdb/S7AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10171b588>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#查看经纬度latitude longitude\n",
    "\n",
    "plt.scatter( train_data[\"longitude\"].values,train_data[\"latitude\"].values,color='purple')\n",
    "plt.title(\"Location\");"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 412,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    49333.000000\n",
       "mean       -73.955704\n",
       "std          1.178138\n",
       "min       -118.271000\n",
       "25%        -73.991700\n",
       "50%        -73.977900\n",
       "75%        -73.954800\n",
       "max          0.000000\n",
       "Name: longitude, dtype: float64"
      ]
     },
     "execution_count": 412,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data['longitude'].describe() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 413,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    49333.000000\n",
       "mean        40.741543\n",
       "std          0.638658\n",
       "min          0.000000\n",
       "25%         40.728300\n",
       "50%         40.751800\n",
       "75%         40.774300\n",
       "max         44.883500\n",
       "Name: latitude, dtype: float64"
      ]
     },
     "execution_count": 413,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data['latitude'].describe() "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可看到经纬度位置点集中分布在西经74度左右，北纬40.5-41.5度之间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 414,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#还需根据经纬度清洗数据，剔除纬度大于41.5，小于40.5的点\n",
    "train_data = train_data[(train_data['latitude'] > 40.5) ] \n",
    "train_data = train_data[(train_data['latitude'] < 41.5) ] \n",
    "\n",
    "#还需根据经纬度清洗数据，剔除经度大于-73.6，小于－74.1的点\n",
    "train_data = train_data[(train_data['longitude'] < -73.6) ] \n",
    "train_data = train_data[(train_data['longitude'] > -74.1) ] \n",
    "\n",
    "#还需根据经纬度清洗数据，剔除纬度大于41.5，小于40.5的点\n",
    "test_data = test_data[(test_data['latitude'] > 40.5) ] \n",
    "test_data = test_data[(test_data['latitude'] < 41.5) ] \n",
    "\n",
    "#还需根据经纬度清洗数据，剔除经度大于-73.6，小于－74.1的点\n",
    "test_data = test_data[(test_data['longitude'] < -73.6) ] \n",
    "test_data = test_data[(test_data['longitude'] > -74.1) ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 415,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEICAYAAACzliQjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3X90XOV54PHvo/HIyAgEWOS0xrGt\nXWhSOCpprZLm0LQsoumCbAjeNiWxwYWkSnCyxQkpP0IXxQ7OEnbTKDnFUG9I1g46JUkj7NjCTRNR\nb7O0SyvFMVMgrSn+EYc0YANKhBVblp79Y+bKo9G9c++duTNzZ+7zOcfnaK7mzry6nnnue9/7vM8r\nqooxxphkaKp1A4wxxlSPBX1jjEkQC/rGGJMgFvSNMSZBLOgbY0yCWNA3xpgEsaBvTERE5J0i8i+1\nbocxxVjQNw1FRA6KyFVVei8VkQudx6r6XVV9SzXe25hSWdA3xpgEsaBvEkFE/khEXhCRV0XkmyKy\nKO93l4jIt3O/+4mIfCK3/TIR+QcReV1Efiwify4izbnf/V1u930iMi4ifyAiV4jIkbzX/WUR2ZPb\n/1kRuTbvd/9bRB4UkSER+ZmIPC0i/7FKh8MkmAV90/BE5ErgvwPvAX4ROAQ8lvvdWcB3gL8GFgEX\nAsO5XaeAjwLtwDuAbmAdgKr+Vu45l6pqq6p+teA908BO4G+ANwH/FRgQkfzhn/cCG4BzgReATZH9\n0cZ4sKBvkmA18CVV/Z6qngDuBt4hIsuAFcC/q+pnVfXnqvozVX0aQFVHVfX/qeopVT0I/AXw2wHf\n8zeAVuB+VT2pqk8Cu8gGesegqv6jqp4CBoC3lf+nGlPcvFo3wJgqWAR8z3mgquMicgy4AHgz8G9u\nO4nILwF/BnQBC8h+X0ZDvOcPVXU6b9uh3Hs6/j3v5+NkTxLGVJT19E0SvAQsdR6IyJnAQuBHwA8B\nr7H0h4AfABep6tnAJwAJ8Z5vFpH879iS3HsaUzMW9E0jSovIGc4/4GvAzSLyNhGZD3waeDo3ZLML\n+AURWS8i80XkLBF5e+51zgJ+CoyLyFuBWwve5yfAf/Bow9PAG8AdIpIWkSuAleTuJRhTKxb0TSN6\nApjI+/dO4L8B3wB+TLZnfwOAqv4M+B2yAfnfgf3Af8q9zseB9wE/A/4XMOtmLfBJYGsuO+c9+b9Q\n1ZPAtcDVwFFgM3CTqv4gwr/TmNDEFlExxpjksJ6+McYkiAV9Y4xJEAv6xhiTIBb0jTEmQWI3Oau9\nvV2XLVtW62YYY0xdGR0dPaqq5/s9L3ZBf9myZYyMjNS6GcYYU1dE5FCQ59nwjjHGJIgFfWOMSRAL\n+sYYkyAW9I0xJkEs6BtjTIIEDvoikhKRvSKyK/f4I7nl51RE2ovs99e5glS7omiwMcaY0oXp6d8G\nPJ/3+CngKrILQxTzP4AbQ7bLGGNMBQQK+iKyGOgBvuhsU9W9uXrkRanqMNnStMYYY2osaE+/H7gD\nmPZ7YilEpFdERkRk5JVXXqnEWxhjjCFA0BeRFcDLqhp0bdDQVHWLqnapatf55/vOIjbGGFOiIGUY\nLgeuFZFrgDOAs0XkUVVdU9mmGZNMQ+uGGN0yik4pkhKW9y6nZ3NPrZtlGoRvT19V71bVxaq6jOwS\nc09awDemMobWDTHy0Ag6lV3RTqeUkYdGGFo3VOOWmUZRcp6+iPyxiBwBFgPPiMgXc9u7nJ9zj78L\nfB3oFpEjIvK75TbamEY1usV9FNVruzFhhaqyqap7gD25n78AfMHlOSPAB/Iev7OsFhqTIE4PP+h2\nY8KyGbnGxIikJNR2Y8KyoG9MjCzvXR5quzFhxW4RFWOSzMnSsewdUymiGq+xwq6uLrWVs4wxJhwR\nGVXVLr/n2fCOMcYkiAV9Y4xJEAv6xhiTIBb0jTEmQSzoG2NMgljQN8aYBLGgb4wxCWJB3xhjEsSC\nvjHGJIgFfWOMSRAL+sYYkyAW9I0xJkEs6BtjTIJY0DfGmASxevrGmFjKDGQYvmeYscNjtC1po3tT\nN52rO2vdrLpnQd8YEzuZgQw7e3cyeXwSgLFDY+zs3Qlggb9MNrxjjImd4XuGZwK+Y/L4JMP3DNeo\nRY3Dgr4xJnbGDo+F2m6Cs6BvjImdtiVtobab4CzoG2Nip3tTN+kF6Vnb0gvSdG/qrlGLGofdyDUm\nRixjJcv5m+1YRM+CvjExYRkrs3Wu7kzk311pgYd3RCQlIntFZFfu8UdE5AURURFpL7LfWhHZn/u3\nNopGG9OILGPFVEOYMf3bgOfzHj8FXAUc8tpBRM4D+oC3A5cBfSJybgntNKbhWcaKqYZAQV9EFgM9\nwBedbaq6V1UP+uz6u8C3VfVVVX0N+Dbwn0tsqzENzTJWTDUE7en3A3cA0yFf/wLgh3mPj+S2zSIi\nvSIyIiIjr7zySsi3MKYxWMaKqQbfoC8iK4CXVXW0hNcXl206Z4PqFlXtUtWu888/v4S3MSYeMgMZ\n+pf1s6FpA/3L+skMZALv27m6k5VbVtK2tA0E2pa2sXLLSruZaSIVJHvncuBaEbkGOAM4W0QeVdU1\nAfY9AlyR93gxsCdsI42pB1Fk31jGiqk0356+qt6tqotVdRlwA/BkwIAP8C3gXSJybu4G7rty24xp\nOJZ9Y+pByTNyReSPReQI2d77MyLyxdz2LudnVX0V+BTwT7l/G3PbjGk4ln1j6kGoyVmquofc8Iyq\nfgH4gstzRoAP5D3+EvClchppTD1oW9LG2KG5Ad6yb0ycWO0dYyJi2TemHlgZBmMiYvViTD2woG9M\nhCz7xsSdDe8YY0yCWNA3xpgEsaBvjDEJYkHfGGMSxIK+McYkiAV9Y4xJEAv6xhiTIBb0jTEmQSzo\nG2NMgljQN8aYBLGgb4wxCWJB3xhjEsSCvjHGJIgFfWOMSRAL+sYYkyAW9I0xJkEs6BtjTIJY0DfG\nmASx5RKNCSAzkJmz9i3Yerim/ljQN8ZHZiDDzt6dTB6fBGDs0Bg7btmBqjI9OT2zbWfvTgAL/CbW\nLOgb42P4nuGZgO+YOjk153mTxycZvmfYgn4A267axoHhAzOPO7o7uOk7N9WwRclhQd8YH2OHxoI/\n93Dw5/oZWjfE6JZRdEqRlLC8dzk9m3sie30/bkNaUZzQCgM+wIHhA2y7apsF/iqwoG9MThRBtm1J\nW+DnZgYy7L5tNxPHJgBoWdjC1Z+/ms7VnQytG2LkoZGZ5+qUzjyuRuB3G9KKaviqMOD7bTfRCpy9\nIyIpEdkrIrtyjztE5GkR2S8iXxWRZpd9mkXkyyKSEZF9InJFhG03JjJOkNUpBU4H2aF1Q4FfI70g\nPXOD109mIMP2m7fPBHyAiWMT7LhlB5mBDKNbRl3389oeNbchLWf4ytS3MD3924DngbNzjz8DfE5V\nHxORh4H3Aw8V7PNHAKraKSJvAnaLyK+r6nSZ7TYmUqN/4RFkPbY72pa2lTT8MXzP8MxN4HxTJ6cY\nvmd45uRTqHB7pYaAvIapohy+MrURKOiLyGKgB9gEfExEBLgSeF/uKVuBTzI36F8MDAOo6ssi8jrQ\nBfxj2S03JkI67RFkp5Wm5iamT84N0E3NTaw/uL6k9ysWPIMG1koOAbUtaXO9lxFm+MpLR3eH61BO\nR3dH2a9t/AUd3ukH7gCcT/5C4HVVPZV7fAS4wGW/fcB1IjJPRDqA5cCbC58kIr0iMiIiI6+88kqo\nP8CYUg2tG2KDbGCDbCj6vPMuPM91+9J3Li35vYsFz6CBtZJDQN2bukkvSM/aFmb4qpibvnPTnABv\n2TvV49vTF5EVwMuqOpo3Ji8uT3XrKn0J+GVgBDgE/D1wqvBJqroF2ALQ1dXl3uUyJkKFveRijj53\n1HX7wT0HS37/7k3dbL95+5whnlRziu5N3Ty+9nHXIR5Jnf7qBR0CKoUzTFWpyWcW4GsnyPDO5cC1\nInINcAbZMf1+4BwRmZfr7S8GXircMfe7jzqPReTvgf1RNNyYUhRmzJSjnODqBM/C7J1L3nNJ0TH9\n5b3LZ36WlPieGMrRubrT5hw0IN+gr6p3A3cD5Hr6H1fV1SLydeD3gMeAtcCOwn1FZAEgqvqGiPwO\ncEpVn4uw/cYElhnIsOOWHa4Tq0pRbnAtDKqFaZKF2i9unzVWv7x3uevVSv6JwZhC5eTp3wk8JiL3\nAXuBRwBE5FqgS1XvBd4EfEtEpoEfATeW2V5jSjZ8z3BkAR9KC67FJjy5pUnmO/Yvx+bs39HdwcE9\nB2s2gcvUH1GN1xB6V1eXjowEG2s1Jgy/G7ZhtS5q5fYf3e76u/xUSkdzazMnx0/Oel5Tuol3f/nd\ndK7uZEPTBvc7Y3nSC9KzTgzpBWlWbllpwzAGERlV1S6/59mMXGNKNP7S+MzPQ+uGGHl4pGjQLgz4\nANOT0+y+bTedqzs90yTzeU2YsqBvgrKgbxLhwUserNhru9WSCWPi2AQbUxs95wr4sQlTJgwL+qYh\nffaCz87qiVfKp+Z/ynXiVlilBnyIZsKUSQ4L+qbuZQYy7PzgTibf8L4JWilRBPxyRDVhyiSHBX1T\n1zIDGQZvGjw9VzwBJCXotNpqXaYkFvRNXRu+ZzhRAd+ydUy5LOibulStMftYEECzFT2tZ2/KZUHf\n1I0w9XLiKn1mmunJ6VCTxFZ9ZVWkgb5SK2KZ+mBB38ReuSmRtZY/iat/WX+o5Rch2oXWK7kilqkP\ngVfOMqYWKh3wJSW0La1cymPhrN2wOfVRFU9z2IpYxnr6JtYq3cPXKc32vHPj5lFLpVOzHgeZdZvP\nrb5POcMztiKWsaBvQrn/3Ps58fqJmcfzz5nPXa/dFeo1ggatjc0by25vYBUqQVUYTLs3dRetpFmo\nsHhaKcMz+cfb6+9sOa8lUHtM/bOgbwIrDPgAJ14/wf3n3h848LsFrR237MjWlX91YuYkAKCT8SoG\nWIrC2bJOYB5cM1jS6xUbnnEL+n7lmh1udYFMY7KgbwIrDPh+2924Ba2pk1MzC4mMHRorOSBWVAnD\nP8Vmy6bPTPvOIHYbzw87PONXrtkxdSK6ktMm3uxGbpVkBjL0L+tnQ9MG+pf1kxnI1LpJNVGPY8eS\nEvqm+0Ld8JWUuE6imul5BygZoVM657PiVWfHa3spx9s+q43Ngn4VOF/0sUPZMVVnHDaJX6Z6LA7m\n3Ex1Wyzci04pgzcOzgmaXj3v9Jlp9559wWflomsumrNCdbEriqDHW5qyL2qf1cZnQb8KGiVNbv45\n80NtdxMmcNZC66LWmeArKaHr1q6Zm6mdqztZuWVltscv2RmyLQuL3AB1CZpePe/JNya5fuv1rlcT\nk8cnGVwzyAPtD7D3kb2zh5kELl17qedN3KDHe/kHsye2RvmsGm82pl8FjZImd9drd5WdvZO/NODY\n4TFazmuJZJHyqHithOUIu64tZIPm42sfB4qnbPq9jutxUtj/xP6i7YXTx1ua5i6m3tHdwf4n9hdd\nuavePqvGmwX9KvD6otfjUEfY9Ew3TuCM20zbUiZpFQZVr6CpU8rO3p1cuvZS9m3d5xrcJ49PZito\nToW7Y+wXkAtPVPmCZvfU42fVuLPhnSpwu8ROch30DbKBDbIhVgEfKPn/o3N1J+sPrqdvuq/oDNrJ\n45Psf2I/K7es9HyOTmno4a9yAnKQ7J4kf1YbkQX9KnAbC05qedyoFyevtcJMF79e+tjhsex6uB5X\nFc5nI+hVR7kBuehVQu6zuvgdi3l87eNskA1snLeRoXVDJb+fqT1RjdcEmK6uLh0Zqe9KisZdNQN+\nc2tz6AlHbUvbWH9wfeDnuw6N+OTzO+/htm9hrfwH2h/wvd+x6tHyKnB6FYBz2lmssqmVeo4XERlV\n1S6/51lP30SmWH53tXv4J984iaTDFSsbOzQWKi/ddWhEmZNS6cjvlQe5+rv681cXfX9JSckB1/m/\nmqk75NHO0S2jnq9h6Zz1yW7kmkjErWSviHDG2WfM6SmnWlK0vqnVM4MmTLs9h0ZyC56MHRqbuTHr\n1isudoM1//0Hbxx0vXpwK8YWxJyrDOdE5bJQi99wVbESECaebHjHRKKUOvFxFmSox29oJEpD64YY\n3TKKTimSEpb3Lp9TjC2oMO3eOG+jfzaRQN90X0ltMdEJOrxjPX0TiUbL4w7y97hVzKxUpkvP5p6S\ng3yhMPNGlvcu912tzNI560vgMX0RSYnIXhHZlXvcISJPi8h+EfmqiDS77JMWka0ikhGR50Xk7igb\nb+Kj0b74Qf6ees3KClO/p2dzD123dnmmolo6Z/0J09O/DXgeODv3+DPA51T1MRF5GHg/8FDBPr8P\nzFfVThFZADwnIn+pqgfLbLeJmUYa2gkTyPzG5eMo7BVK/lWGra9b/wIFfRFZDPQAm4CPiYgAVwLv\nyz1lK/BJ5gZ9Bc4UkXlAC3AS+Gn5zTZx0QiLlbcsbKG5tTkxgaxwFnGYv7keT3JmtqA9/X7gDuCs\n3OOFwOuqeir3+Ahwgct+fwVcB/wYWAB8VFVfLXySiPQCvQBLliwJ3HhTG/k3FetdekGaqz9/deIC\nmQXv5PIN+iKyAnhZVUdF5Apns8tT3SLAZcAUsAg4F/iuiHxHVV+ctaPqFmALZLN3gjffFPK6/I7q\nsnxj88aGWNEK4jO5qF6HTOq13UkXpKd/OXCtiFwDnEF2TL8fOEdE5uV6+4uBl1z2fR/w16o6Cbws\nIk8BXcCLLs81ZfLKld/75b0cePLAzGm51Bz6+8+9v64Cfp9m0whrEZyCvmfc5jcEVa/tNiHz9HM9\n/Y+r6goR+Trwjbwbuc+o6uaC598JvBW4hezwzj8BN6jqM17vYXn6pQubKx8mn7wea+Y4Qb/agpRY\ncFQz1z9K9druRlaNPP07gcdE5D5gL/BI7o2vBbpU9V7gQeDLwD+THRL6crGAb8oTNlfe7/mZgUx2\nwfIY1bsvR37v220wsk/7IrkqCLN4eb2utVCv7TYhg76q7gH25H5+keyYfeFzvgl8M/fzONm0TVMF\nxRbo8Hq+l3rs2ecr7OVnBjJsv3k705PTnvsU/s1jh8bYfvN2INyQRZiAWOu1Fko9yUXVbrsvUH02\nI7eBdG/q9qzT4vX8QvUY7NuWtjH+8jhTE1Mz26L6O6Ynp9n5wZ2hAlGYgFjNWb2FyhmXj6Lddl+g\nNqzKZgPpXN0ZOOCnz0zP+WLVW8Bvas5+fMcOjc0K+FGbfGOSDXK6cmixaqIQbtGcWs7qLWc9XL92\n+x2jct/flM4KrjWYIDdzm9JNvPvL754VWOot4PvVra8qga4Pdc2qjVMPwxaea+KWWUDN7UZ2U7qJ\n+WfPZ+LViZnj4XlVagXcShL0Rq4F/Qbjt+Zpy8IW18lIdRX04xTwHQKrvlLegibV8OAlD3L0uaOh\n9glb1TNIxyO9IM28lnmuSQKWAVQaW0QloWZddsNMoay2pW2senQVdxy9Y05g2ti8sertLEvcAj6A\nwuCawVCLsFRbKQEfsjX1Rx4aCbRMYmYgEyiZwOmU2NrR1Wc3chvQnNoqLjNP66pnX0dKzfiphlIC\nfr7RLaNFe/vOVWZQbr38eS0WkirNjnCdKjZm7JYVMXjjIINrBmdWdDKVMz05ze7bdscu6JfLr9aS\n6/KRIU0cm4jtSbNRWNCPqbBBPT/VzXPtVhqrBHKcufViC4dX2i9u58PPfrgq7QkyNFOuYhOzUs0p\npk4Gy7Bq1JNmXFjQj6FSgnr+jE+bFRkPfkNoR587ymcv+Cy3/+j2irYjyvLX267axk3fucn1d57z\nE3LDi8VmQxcqZRZ4PWRMxYEF/RjyCuqDaweLBnVne9iZuaZ2xl8aZ2jdUGRLIboZ3TIa2WsdGD7g\n+btiE7bySzlXYj3lWk70uv/c+znx+omZx/PPmc9dr91V0fcsh2XvxJBnT30qO0Tgt9yd2+QgE18j\nD49UNOOnWuseBJ1o1r2p23P5RUfLwpZQ712riV6FAR/gxOsnuP/c+yv6vuWwnn4MtZzX4nl5e/S5\no3Td2uV6uX7RNRcBp3s2g2sGK9dIEx3FtRhbVCQlVQ38fn+H8/udH9zJ5Btzb/ymmlNc/fmrQ71v\nrQrAFQZ8v+1xYD39OrT/if2+220ss75UMjgt710e2Wt1dHdE8jqdqzv5xPgn6NM+Vj26atbVwXVf\nui705zfMYu9JZz39GJp4tfhNLK/xULuBW7/alrS5Tp7ymkEdhnO/IOwSlx3dHbPG8Du6Ozxv4pYj\niqUbz7vwvDnfC5vo5c6Cfgw1n9nMyfGTrr9rXdTK+I/HXTMg8ns1NvmqvnidyCeOTTC4ZtBzqC5o\niYSezT30bO4JfBN1/jnzKxLgK2Fo3ZDrDebF71hc8Sve+efMdx3KmX/O/Iq+bzlseCdmhtYNFQ34\nqXTKs0hVnHs1TlmIqLUuaq3I69YLp0TCtqu2BXp+kM9I3LNPCnllJx3cc7Di733Xa3fNCfBxP37W\n068yv1xiz/Q6gdt/dHu2MqIbZdbkrThxLrMrcWP59h/dHulVTWp+iqkTlSvTXCkHhg+QGcgEuola\n7P/Bb4nJoXVDM8NEYQuxVYrXkFW1bl7HOcC7saBfRUFyiT0/qHo6XdNrAkz+e8SFpIRL117qG2xK\nETatrxgngC25fEnRhWi8MqfiYHDNYNnDGfctuM91bYLWRa289bq3zvrbnasMoKaB3ys7yS8tNKks\n6FdRmLVT3Rx97iiti1pJL0h7rlgURf2TKOmUsm/rPpZcviTy1544NkFmIMOqR1eVdEJpbm1mxcMr\nXI/943/4OHrqdCDJL5kw8vBIPCt9kruXk4KWc1pm1a7P/xtbF7Uy/tK46/5ei9GMvzTuebIbeXik\npkF/ee9y17ZFmbXUSGxMv4qiyCUef2m86ASYOGbwTB6frNicge23bKdzdSerHl0VeJ+m5iZWPbqK\nu39296xg6Kz2NLhmcE7Pcezg2MywWezTAKdyZQw0V2xvzSAbZAMPXvIgkB0SK7wXUta9Ea1ObR8v\nPZt76Lq1a6ZnLymh69Yu3xNRkNW9GpEtolJFXpkT+YtGBBmfdht3jbK+Sr3JPx7brtpWUpqh3+Iz\ncPr/KTOQqduJb5IW7j15r+vvyrk3Iinh3lPurxtHbv/f6QXpqi1VWQlBF1Gx4Z0qcqtNgmR7Y/3L\n+jnvwvNKet0kB3xg1g3MoGmGhTck0y1p32Ex5yqqc3Unuz60yzPLKl9hrnut6aTy4CUPRl7ds1o3\nTYPyS5god6i1nlnQr6JZi5scGpu17N/YobFA+dPtF7fP2Ra3gF/Naf9wutxE0C9r4UlSpzRQAHeG\ndTIDGU6+4f/89JlpXn3h1UBtqqajzx2NfB5HkJumfoE4qiqZQRImalW2IQ5sTL/KOld3sv7g+uyY\nfAlxsbCHFsdJWPPOqH5fIkxhrVKqTqYXpLnomot4oP2B7EnG5/+uKd3E9OR0YqqdLnzLwqK/dwLx\n2KGxWfcaHmh/gMxAxvX3O3t3ljTOHqT4WpLLNlhPv0ZK6VEUTnCKY8BvSje5FtGqtPwhsoN7Ds4M\n2yx8y8KylwmE7ISbvY/sDbQQSNvSNk6OnwxWEz6Oi7yX4OhzR/l066dZ+RfuY+JeWWUTxybY2buT\neS3zAg+3+F0RBOnFFysD3eisp18jpfQo4v6BbGrO9m5rZezQGAeGD8wMLemURhLwIZs15RvwJZvH\nv/7get/6STMaIOA7Jt+Y9OydF+vkTB6f9DxBFu4X5IogSC8+aBnoRhQ46ItISkT2isiu3OMOEXla\nRPaLyFdFpNlln9Ui8v28f9Mi8rYo/4B6FbbmfcvClth/IKdP1i7gx4LC3kf2khnI0HJedBPH6olX\nDftSh00K9wsydOP23XLrxTtDrX3Tfaw/uD7236+ohOnp3wY8n/f4M8DnVPUi4DXg/YU7qOqAqr5N\nVd8G3AgcVNXvl9PgRuHW0+jo7kCa5t4QSy9Ih64vbmpj6uQUuz60ixM/jW899Upz69X7dXJaFrYE\nCtRBhm6S3IsPItCYvogsBnqATcDHRESAK4H35Z6yFfgk8FCRl3kv8Jclt7QBeZWUtbU+61uQTKBK\nqnWpCLdevfP53X3b7jlDOfmdGr/PvWcZkoL3jKJcc6MKeiO3H7gDOCv3eCHwuqqeyj0+Alzg8xp/\nAFzn9gsR6QV6AZYsiX66fr2xD2yDEuib7mPjvI3lp7SmAJdbDM2tzSy5fAnPfu3ZkhYXj8LJ8ZOu\nxd+cz3WxTo3f5z7JN2Cj4ju8IyIrgJdVNT/PzS0p1/NTLCJvB46r6j+7/V5Vt6hql6p2nX/++X5N\nMjl+FRFNjXikrDu90UhqwnjcUz45fpKdvTu55D2XlPzSkhLfYnaplpTncM3EsQm237zdM92ynLF0\nG7opX5Ce/uXAtSJyDXAGcDbZnv85IjIv19tfDLxU5DVuwIZ2KsIJ/GHSN4sV3DLlaUo38Wsf+DX2\nbd3n2Rt1asJUqnDb5PFJzyU1g0i3pLNXCR7ppE69+Jkeu8twy/TkNLtv212RYGxXwuUJVXtHRK4A\nPq6qK0Tk68A3VPUxEXkYeEZVN7vs0wQcBn5LVV/0e49Grr1TaX7lGFoWtnDq56dqkkefBOkz0zN5\n6qXcl6n2vIv2i9tZ9tvLTi+jKNCUamL6VF4WVi7wty31/huKtduuRqunGrV37gQeE5H7gL3AI7k3\nvhboUlWn+tJvAUeCBHxTHmdJPDf1XCSsXky+Mcnhpw7P9EQr1hstZ0JX7r5CPucz41oQUGcXBDT1\nL9TkLFXdo6orcj+/qKqXqeqFqvr7qnoit/2beQHf2ec3om22CWvwRgv4ZWki0LyKkYdG5pTpjaKE\nb/rM9MwYdteHujzvG4BPO4ucLEqtR+M1/h/lIjcmOlaGoYFkBjKzUuJaFraczu9voJmftdD25uzw\nhjNkUzR45hX4AnyLfzn6tM91qKRwiGTbVds8398Zhnl87eOhV5MKmg5Z6OrPX82OW3bMmrGcak7V\nbG5J/r0Gp/hfseGppLGg3yAyAxm237x9VhmEiWMTbP/D7UV7hSYYJxg6wxx+aZeTxyfZfdtumlub\nQ5XwDTIGXqxUs9O+w08dDr2aVKnpkLOqx9Z4bklhhU3n/6jYyTZpLOg3iOF7hl3r3sy6KWfKkh80\nvJboyzdxbMK7pkyFq2864/TOmsgkAAAOg0lEQVRhFjEvJ3jHJaOm2HKhSamX78eCfoNIQh3wWsuv\n8VJOSmQxUc7GLnZj30tcgnep/L4H9j2xKpsNIwl1wONg7NAY22/ePrunHtG3KGhN+Y7uDtf9vbYn\ngXOz3O/elX1PLOg3jO5N3TSl5/53Ns1rct1uSjdnGK2EETS3G6pBKkhCdknIwgAfdC3gRjTrZFmE\nlWvIsuGdBuFW0Co/e8dr5qSpDbcbqmFSJpMa4N0UG8ePU/bOfQvuY2oiL8OpJcWfHv/TqrfDgn6d\nKxwDvuQ9l7D/if2MHR6jubWZw08d5pmvPFPzyo9JISlBpxVp8l4nuKO7w3WsvdSUyaTzHKcXuPfU\nve6/q7LCgA8wNTHFfQvuq3rgt+v+OuY2Bjzy0Micx9UO+B3dHfRpH33ax6pHV1X1vWtNp5W+6T6u\n33r93ElSuZW1vHrpQRf/MLPVw3q3hQHfb3slWU+/jhW7rK2lA8MHeKD9gZqV9q0lJ9CUkv4Yp3z3\nemLllsOxoF/H4jxG36gBP70gzaVrL3WtV18YaEpJf6z3lMlasJNlOBb069C2q7YVnZVpotOUbmL+\n2fOZeHViVjDp2dxjK5zFSNxPlqmWlOtQTqolVfW2hCqtXA1WWtlbZiDDjg/sYOrn1RkHdLJ/vKb0\nNyrnZqwFchOlSmfvVKO0csNw67FB9S8XCwum1drEsQmG7xmme1N3NiMoxsNJxbQtbeOnP/wpOu3f\nwUkvSNtKTKYiapGe6SbxPf3CAk2QvaQXkVlVAysdDDIDGc/KiGa21kWtTJ2YCnxy9KpeOYsQ6uRu\nQzsmbqynH4BXoHUrXFbpYk27b9ttAT+g8ZfGkaZwpUPblrrnwDsKFxYpprCjYBUcTT1JbNB3vrhh\nAm2lijVlBjKxGdKpF0GGahx+6aPFasy79eiLlUuwoG/iLrFBv5Qc90pM9nBOPqZy/E6oXjXmvXr0\nXp8bq+Bo6kFiZ+T6fUGb5s0+NJWa7BGXCVbtF7fPzKAtttxey8KWQMsG1ouuW7s8yw979ei9rgzi\nNAPUGC+JDfq+X1DJrfGZW5e0Ujdx45IRc+wHx8gMZOhc3cnKLStd1zdNL0hz9eev9vx9JEKu8iUp\nmfk/KkWxevNeHQOdUiuXYOpWYoO+W52TfNOT0zS3NtM33cf6g+srEvBLWSC7UnRaZ5XwbW5tBk6P\nd+ef+DpXd3LH0TtY9eiqkoOtl77pvlAnlOu3Xj/zfxR1WzxruuSORdvStop3CoyJWqJTNvMXUHYl\n4bI6wupf1h+bnj4AAqu+sspz3Lq5tZmTb5yck6KYGcgweNNgSXXlC3V0d/CrN/9q4PTV/DVlMwOZ\nOQt0h9m/kFs6r+Xxm7iylM0AnF6rV/Ct9Bht3G78tS1pK3qPwanWOXZojME1gwyuGQRyVwMRLcV7\nYPjATBVKv4lqhVcEbmsKpM9MM/nzSXA5D/itNGU1XUwjSmRPvzAN76JrLmLf1n1V79HFraff0d3B\ngScP+C45V2ntF7fz4Wc/DFB0UtWqR1cF/v8prFeU5JWmTGOynr4HtzS8fVv3cenaS2cWHwnboyt1\ndmb3pu6Z3nIU2pa2cfzocSbfKC0b6MDwgWzPuMT9o3L0uaMzPzsrHxWSJgl1QrYAb0xW4oK+Vxre\n/if2s/7g+tCvV87szM7Vnfzdp/9uVpArSROs2pbt9W5o8ik34KPWAb/Q8t7lrsXeln/QPbfeGIeV\nynAXOHtHRFIisldEduUed4jI0yKyX0S+KiLNHvv9ioj8g4g8KyIZETkjqsaXIsw6pEEEXczay4ef\n/bDv2LKrFDOZI07Ah9rmiheb2Vqqns09dN3aNfPakpKiufXGgPuqcjt7d8YqY65WwvT0bwOeB87O\nPf4M8DlVfUxEHgbeDzyUv4OIzAMeBW5U1X0ishCoaVcy6nVIoziJOEMPxerkB73H4LaKUDU47Yti\nuKr94vZZj3s291iQN6FYqQxvgYK+iCwGeoBNwMdERIArgfflnrIV+CQFQR94F/CMqu4DUNVjEbS5\nLFEvrRblSSR/3LmcS9N5LfOKBn1pklC1a7xfCNDs1Ub3pm4OP3W4/NeEmZu4xgTlW0U1J24Zc7UQ\ntKffD9wBnJV7vBB4XVVP5R4fAS5w2e+XABWRbwHnA4+p6gOFTxKRXqAXYMmSJcFbX4Ko0/AqtT5n\nKSsBuZaJntfE9KnT+ZTNrc2seHhF2XX78zNnfOc7lMnGZk0xQQM+WKkMCBD0RWQF8LKqjorIFc5m\nl6e6dR3nAb8J/DpwHBjOpRXNGvBW1S3AFsimbAZvvj+vgBFV0IhTLrfbJe30qWnalrbNuUk9eGO4\nYZim5iamJ6ddJ2ZFOZxUeF/AyhjHQ1Qn3lqewK1URlaQnv7lwLUicg1wBtkx/X7gHBGZl+vtLwZe\nctn3CPB/VPUogIg8AfwaEOwuZ5mqFTDisj5nmPsLXsNSXnRKWfWVuXnxUReMy6946bXeQf6N8jic\nbBvR0LohRh4ece3Klfo9qsUJvG1pm30+Cvhm76jq3aq6WFWXATcAT6rqauBvgd/LPW0tsMNl928B\nvyIiC3I3dX8beC6SlgdQbmZNvfGsFeOy3a/2UCGdUgZvHGSDbKB/Wf9MFkSUY6TtF7fP3LD1W+/A\nmRWcn50xuGaQBy95MLL2JFFmIMOnWz+dTZMtcs1dyveoFt/H9QfXV7R+Vj0qp+DanWRv6r5Adoz/\nEQARuVZENgKo6mvAnwH/BHwf+J6qDpXX5OCiTs+MO7dA7nVJ61TTzC8a5ltALRcE8tPfohwjzZ+v\nUOoVxNHnjrLtqm2RtSlJZnriAedqhP0eJe37GFehJmep6h5gT+7nF4HLXJ7zTeCbeY8fJZu2WXVR\np2fGXdj7C17DUkHG6J0eWqVSRMsJBF5pr6a4sCfasN+jSn0fvdZALlZML8kaekZupTJr4qzc+wvO\nvkGqXI4dGptzoim3bs/QuiF6NveEvudgyr9JGuZEW8r3qJLfRwvwwTV0PX23IQwri+uvc3Un12+9\n3nfM38m06VzdOTN2Wi6n5ELRew7RT/ytucxAhv5l/Wxomn3PJMz+5c5ADdrjLvV7ZN/HeGjonj7E\nJ7Om3szqwXv0uMMsKl/u+zuF19qWZquifu+L32N60r2ec0llLWooiqyWKGagFhuqc+Z3lPtdsu9j\n7TV80Del811vIOKVqrze382Sy5e4TjCrx5LJUQTsKG6SxmnOiakcC/rGVxzvjTRSjzGKgB3VTdJG\nOq7GXUOP6ZtohBmL7brVdw0HUyDM/AovYdJ1TbJZT98EErQH6EyuGt0yik4pkhKWXbGMV194ddaQ\nAeBakTOJWRhRXEnZ0IwJKpHLJRoTN1ZUzpTLlks0po7YWLqpFgv6VWI9OWNMHFjQrwIrD2yMiQvL\n3qmCpFX7NMbElwX9KrDqgsaYuLCgXwVR5GEbY0wULOhXgU2cMcbEhd3IrQKbOGOMiQsL+lViedjG\nmDiw4R1jjEkQC/rGGJMgFvSNMSZBLOgbY0yCWNA3xpgEsaBvjDEJYkHfGGMSxIK+McYkiAV9Y4xJ\nEAv6xhiTILFbI1dEXgEORfBS7cDRCF6n0dlxCsaOUzB2nPxV6hgtVdXz/Z4Uu6AfFREZCbJIcNLZ\ncQrGjlMwdpz81foY2fCOMcYkiAV9Y4xJkEYO+ltq3YA6YccpGDtOwdhx8lfTY9SwY/rGGGPmauSe\nvjHGmAIW9I0xJkHqOuiLyFdF5Pu5fwdF5PsFv18iIuMi8nGP/T8iIi+IiIpIe3VaXX0RHKcOEXla\nRPbnXqu5Oi2vHq9jJCKX5W3fJyLXe+x/pYh8T0T+WUS2ikhDLkUawXHqzh2n74vI/xWRC6v7F1RH\nBMfpu3nPe0lEtkfWtkYZ0xeRzwJjqroxb9s3gGngaVX9ny77/CrwGrAH6FLVhp9UUuJx+howqKqP\nicjDwD5Vfahqja6y/GMkIguAk6p6SkR+EdgHLFLVU3nPbyI7obBbVf9VRDYCh1T1kZr8AVUS9jjl\n9vlX4DpVfV5E1gGXqeofVr3xVVTKcSrY/xvADlXdFkV76rqn7xARAd4D/GXetncDLwLPeu2nqntV\n9WDFGxgTpRyn3D5XAn+V27QVeHdlW1o7hcdIVY/nfSHPANx6SQuBE6r6r7nH3wb+S6XbWkslHidy\n28/O/dwGvFTJdtZaGcfJ2f8sst+/yHr6DRH0gXcCP1HV/QAiciZwJ7Chpq2Kn1KO00Lg9bwP6hHg\ngoq2srZmHSMAEXm7iDwLZIAPufTKjgJpEXFmWf4e8OaqtLZ2SjlOAB8AnhCRI8CNwP1VaW3tlHqc\nHNcDw6r606gaFPtxRxH5DvALLr+6R1V35H5+L3m9V7JB7HOqOp490Ta+Ch4nt1/U5ZhgiccIVX0a\nuEREfhnYKiK7VfXneb9XEbkB+JyIzAf+Bij2RY61Sh2nnI8C16jq0yLyJ8CfkT0R1J0KHyfHe4Ev\nRtVmpwF1/Y/siesnwOK8bd8FDub+vQ68CnykyGscBNpr/bfE8TiRDfpHgXm5x+8AvlXrv6dax8jl\nOX9L9v5Psdd5F/C1Wv89cTtOwPnAv+U9XgI8V+u/J27HKe93C4FjwBlRtiv2Pf0ArgJ+oKpHnA2q\n+k7nZxH5JDCuqn9eg7bFSUnHSVVVRP6W7JDFY8BaYAeNac4xEpEO4IeavfG2FHgL2ZPkLCLyJlV9\nOdfTvxPYVKU210Kpx+k1oE1Efkmz9z9+B3i+Sm2uhZI/Tzm/D+xS76uAkjTCmP4NFFw+FSMiT4jI\notzPf5wbW1wMPCMi0V5GxUvJx4lsEPuYiLxAtvfRqFkpbsfoN4F9uZS7x4F1msvyKjhGfyIizwPP\nADtV9clqNboGSjpOmh27/iPgGyKyj+yY/p9Usd3VVs7nyWv/sjVMyqYxxhh/jdDTN8YYE5AFfWOM\nSRAL+sYYkyAW9I0xJkEs6BtjTIJY0DfGmASxoG+MMQny/wHyr0+0pd0UTQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x117a9da58>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter( train_data[\"longitude\"].values,train_data[\"latitude\"].values,color='purple')\n",
    "plt.title(\"Location\");"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "曼哈顿华尔街的位置是 (40.70696 , -74.00979)，计算所有点与该处的距离作为feature(如果城市有多个中心，这样处理特征就有局限性了）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 416,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from math import radians, cos, sin, asin, sqrt\n",
    "\n",
    "#计算两点间距离-m\n",
    "def geodistance(x,lng2,lat2):\n",
    "    lng1, lat1, lng2, lat2 = map(radians, [x['longitude'],x['latitude'], lng2, lat2])\n",
    "    dlon=lng2-lng1\n",
    "    dlat=lat2-lat1\n",
    "    a=sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 \n",
    "    dis=2*asin(sqrt(a))*6371*1000\n",
    "    return dis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 417,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#分别对训练样本和测试数据计算距离\n",
    "train_data[\"dis2MHD\"]=train_data.apply(geodistance,axis=1,args=(-74.0097,40.70696))\n",
    "test_data['dis2MHD']=test_data.apply(geodistance,axis=1,args=(-74.0097,40.70696))\n",
    "\n",
    "train_data = train_data.drop(['longitude', 'latitude'], axis=1)\n",
    "test_data = test_data.drop(['longitude', 'latitude'], axis=1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 418,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10         5725.808989\n",
       "10000     10406.936420\n",
       "100004     3602.496322\n",
       "100007     6306.161401\n",
       "100013    13983.395510\n",
       "100014     4038.424309\n",
       "100016    11106.710333\n",
       "100020     4145.525319\n",
       "100026    14024.634513\n",
       "100027     3361.889508\n",
       "100030     9415.514205\n",
       "10004     16424.450188\n",
       "100044     5407.166248\n",
       "100048     7469.849647\n",
       "10005      6857.828130\n",
       "100051     5076.495211\n",
       "100052     3451.787249\n",
       "100053    18799.018495\n",
       "100055    11034.003258\n",
       "100058     3816.137265\n",
       "100062     4772.772121\n",
       "100063     4723.681115\n",
       "100065     5158.510016\n",
       "100066      143.466797\n",
       "10007      3822.683782\n",
       "100071     6834.676885\n",
       "100075     8872.832149\n",
       "100076     7269.742468\n",
       "100079    16210.944433\n",
       "100081     8743.849832\n",
       "              ...     \n",
       "99915      4915.058937\n",
       "99917      7020.112381\n",
       "99919      8635.139324\n",
       "99921     10115.893909\n",
       "99923      8989.461843\n",
       "99924      6651.535341\n",
       "99931      2651.444428\n",
       "99933       459.117739\n",
       "99935      3431.624396\n",
       "99937      9415.514205\n",
       "9994      10405.276799\n",
       "99953      7020.112381\n",
       "99956       302.946845\n",
       "99960      1519.345439\n",
       "99961      2857.598544\n",
       "99964      8579.967479\n",
       "99965      9407.079823\n",
       "99966      8104.486293\n",
       "99979      8431.297229\n",
       "99980       236.648385\n",
       "99982      6754.643339\n",
       "99984      8471.823858\n",
       "99986      3208.832919\n",
       "99987      9651.714630\n",
       "99988      3166.992396\n",
       "9999       4732.672477\n",
       "99991       662.781741\n",
       "99992      6137.614021\n",
       "99993        52.337753\n",
       "99994     19720.798190\n",
       "Name: dis2MHD, Length: 49278, dtype: float64"
      ]
     },
     "execution_count": 418,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data['dis2MHD']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## created"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "分析created变量，根据info可知它是时间字段，包括年月日，时间，例如，2016-06-24 07:54:24。简单判断，创建的时间部分对计算结果没有意义，但日期可能是有效的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 419,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data['Date'] = pd.to_datetime(train_data['created'])\n",
    "train_data['Year'] = train_data['Date'].dt.year\n",
    "train_data['Month'] = train_data['Date'].dt.month\n",
    "train_data['Day'] = train_data['Date'].dt.day\n",
    "train_data['Wday'] = train_data['Date'].dt.dayofweek\n",
    "train_data['Yday'] = train_data['Date'].dt.dayofyear\n",
    "\n",
    "train_data = train_data.drop(['Date', 'created'], axis=1)\n",
    "\n",
    "test_data['Date'] = pd.to_datetime(test_data['created'])\n",
    "test_data['Year'] = test_data['Date'].dt.year\n",
    "test_data['Month'] = test_data['Date'].dt.month\n",
    "test_data['Day'] = test_data['Date'].dt.day\n",
    "test_data['Wday'] = test_data['Date'].dt.dayofweek\n",
    "test_data['Yday'] = test_data['Date'].dt.dayofyear\n",
    "\n",
    "test_data = test_data.drop(['Date', 'created'], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## description "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 420,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count        49278\n",
       "unique       38182\n",
       "top               \n",
       "freq          1642\n",
       "Name: description, dtype: object"
      ]
     },
     "execution_count": 420,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data['description'].describe() "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "简单丢弃"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 421,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_data = train_data.drop('description', axis=1)\n",
    "test_data = test_data.drop('description', axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## display_address"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "显示地址与经纬度的含义类似，都是用来表示位置特征，丢弃"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 422,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_data = train_data.drop('display_address', axis=1)\n",
    "test_data = test_data.drop('display_address', axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## manager_id "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "将manager分为几个等级 top 1%， 2%， 5， 10， 15， 20， 25， 30， 50，"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 423,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "managers_count = train_data['manager_id'].value_counts()\n",
    "\n",
    "train_data['top_10_manager'] = train_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 90)] else 0)\n",
    "train_data['top_25_manager'] = train_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 75)] else 0)\n",
    "train_data['top_5_manager'] = train_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 95)] else 0)\n",
    "train_data['top_50_manager'] = train_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 50)] else 0)\n",
    "train_data['top_1_manager'] = train_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 99)] else 0)\n",
    "train_data['top_2_manager'] = train_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 98)] else 0)\n",
    "train_data['top_15_manager'] = train_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 85)] else 0)\n",
    "train_data['top_20_manager'] = train_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 80)] else 0)\n",
    "train_data['top_30_manager'] = train_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 70)] else 0)\n",
    "\n",
    "train_data = train_data.drop('manager_id', axis=1)\n",
    "\n",
    "managers_count = test_data['manager_id'].value_counts()\n",
    "\n",
    "test_data['top_10_manager'] = test_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 90)] else 0)\n",
    "test_data['top_25_manager'] = test_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 75)] else 0)\n",
    "test_data['top_5_manager'] = test_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 95)] else 0)\n",
    "test_data['top_50_manager'] = test_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 50)] else 0)\n",
    "test_data['top_1_manager'] = test_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 99)] else 0)\n",
    "test_data['top_2_manager'] = test_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 98)] else 0)\n",
    "test_data['top_15_manager'] = test_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 85)] else 0)\n",
    "test_data['top_20_manager'] = test_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 80)] else 0)\n",
    "test_data['top_30_manager'] = test_data['manager_id'].apply(lambda x: 1 if x in managers_count.index.values[\n",
    "    managers_count.values >= np.percentile(managers_count.values, 70)] else 0)\n",
    "\n",
    "test_data = test_data.drop('manager_id', axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## photos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 424,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_data['photos_count'] = train_data['photos'].apply(lambda x: len(x))\n",
    "train_data.drop(['photos'], axis=1, inplace=True)\n",
    "\n",
    "test_data['photos_count'] = test_data['photos'].apply(lambda x: len(x))\n",
    "test_data.drop(['photos'], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## street_address"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "与经纬度高度相关，删除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 425,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_data.drop(['street_address'], axis=1, inplace=True)\n",
    "test_data.drop(['street_address'], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 426,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_map = {'low': 2, 'medium': 1, 'high': 0}\n",
    "train_data['interest_level'] = train_data['interest_level'].apply(lambda x: y_map[x])\n",
    "\n",
    "y_train = train_data.interest_level\n",
    "train_data = train_data.drop(['listing_id', 'interest_level'], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "描述特征文字长度 特征中单词的词频，相##当于以数据集features中出现的词语为字典的one-hot编码（虽然是词频，但在这个任务中每个单词）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 427,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data['features_count'] = train_data['features'].apply(lambda x: len(x))\n",
    "train_data['features2'] = train_data['features']\n",
    "train_data['features2'] = train_data['features2'].apply(lambda x: ' '.join(x))\n",
    "\n",
    "c_vect = CountVectorizer(stop_words='english', max_features=200, ngram_range=(1, 1))\n",
    "c_vect_sparse = c_vect.fit_transform(train_data['features2'])\n",
    "c_vect_sparse_cols = c_vect.get_feature_names()\n",
    "\n",
    "#train_test.drop(['features', 'features2'], axis=1, inplace=True)\n",
    "train_data=train_data.drop(['features', 'features2'], axis=1)\n",
    "#hstack作为特征处理的最后一步，先将其他所有特征都转换成数值型特征才能处理\n",
    "train_data_sparse = sparse.hstack([train_data, c_vect_sparse]).tocsr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 428,
   "metadata": {},
   "outputs": [],
   "source": [
    "listing_id = test_data.listing_id\n",
    "test_data = test_data.drop('listing_id', axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 429,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test_data['features_count'] = test_data['features'].apply(lambda x: len(x))\n",
    "test_data['features2'] = test_data['features']\n",
    "test_data['features2'] = test_data['features2'].apply(lambda x: ' '.join(x))\n",
    "\n",
    "c_test_vect = CountVectorizer(stop_words='english', max_features=200, ngram_range=(1, 1))\n",
    "c_test_vect_sparse = c_test_vect.fit_transform(test_data['features2'])\n",
    "c_test_vect_sparse_cols = c_test_vect.get_feature_names()\n",
    "\n",
    "#train_test.drop(['features', 'features2'], axis=1, inplace=True)\n",
    "test_data=test_data.drop(['features', 'features2'], axis=1)\n",
    "#hstack作为特征处理的最后一步，先将其他所有特征都转换成数值型特征才能处理\n",
    "test_data_sparse = sparse.hstack([test_data, c_test_vect_sparse]).tocsr()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 特征处理结果保存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 430,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "#存为csv格式方便用excel查看\n",
    "train_data = pd.concat([train_data, y_train], axis=1)\n",
    "test_data= pd.concat([listing_id,test_data],axis = 1)\n",
    "train_data.to_csv('RentListingInquries_FE_train.csv', index=False)\n",
    "test_data.to_csv('RentListingInquries_FE_test.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
